[llvm] cd83d9f - [AArch64][SVE] Implement remaining floating-point arithmetic intrinsics

Thu Nov 14 04:14:14 PST 2019

Author: Kerry McLaughlin
Date: 2019-11-14T11:59:00Z
New Revision: cd83d9ff5c9085706338a09fcef0c708955c3d23

URL: https://github.com/llvm/llvm-project/commit/cd83d9ff5c9085706338a09fcef0c708955c3d23
DIFF: https://github.com/llvm/llvm-project/commit/cd83d9ff5c9085706338a09fcef0c708955c3d23.diff

LOG: [AArch64][SVE] Implement remaining floating-point arithmetic intrinsics

Summary:
Adds intrinsics for the following:
  - fabs & fneg
  - fexpa
  - frint[a|i|m|n|p|x|z]
  - frecpe, frecps & frecpx
  - fsqrt, frsqrte & frsqrts

Reviewers: huntergr, sdesmalen, dancgr, mgudim

Reviewed By: sdesmalen

Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, cameron.mcinally, cfe-commits, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D69800

Added: 
    

Modified: 
    llvm/include/llvm/IR/IntrinsicsAArch64.td
    llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
    llvm/lib/Target/AArch64/SVEInstrFormats.td
    llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 9260c7302bb9..3808f26ff83f 100644

--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -824,6 +824,11 @@ let TargetPrefix = "aarch64" in {  // All intrinsics start with "llvm.aarch64.".
                  llvm_i32_ty],
                 [IntrNoMem]>;
 
+  class AdvSIMD_SVE_EXPA_Intrinsic
+    : Intrinsic<[llvm_anyvector_ty],
+                [LLVMVectorOfBitcastsToInt<0>],
+                [IntrNoMem]>;
+
   class AdvSIMD_SVE_PUNPKHI_Intrinsic
     : Intrinsic<[LLVMHalfElementsVectorType<0>],
                 [llvm_anyvector_ty],
@@ -966,12 +971,14 @@ def int_aarch64_sve_uxtw : AdvSIMD_Merged1VectorArg_Intrinsic;
 //
 
 def int_aarch64_sve_fabd       : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fabs       : AdvSIMD_Merged1VectorArg_Intrinsic;
 def int_aarch64_sve_fadd       : AdvSIMD_Pred2VectorArg_Intrinsic;
 def int_aarch64_sve_fcadd      : AdvSIMD_SVE_CADD_Intrinsic;
 def int_aarch64_sve_fcmla      : AdvSIMD_SVE_CMLA_Intrinsic;
 def int_aarch64_sve_fcmla_lane : AdvSIMD_SVE_CMLA_LANE_Intrinsic;
 def int_aarch64_sve_fdiv       : AdvSIMD_Pred2VectorArg_Intrinsic;
 def int_aarch64_sve_fdivr      : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fexpa_x    : AdvSIMD_SVE_EXPA_Intrinsic;
 def int_aarch64_sve_fmad       : AdvSIMD_Pred3VectorArg_Intrinsic;
 def int_aarch64_sve_fmax       : AdvSIMD_Pred2VectorArg_Intrinsic;
 def int_aarch64_sve_fmaxnm     : AdvSIMD_Pred2VectorArg_Intrinsic;
@@ -984,12 +991,26 @@ def int_aarch64_sve_fmls_lane  : AdvSIMD_3VectorArgIndexed_Intrinsic;
 def int_aarch64_sve_fmsb       : AdvSIMD_Pred3VectorArg_Intrinsic;
 def int_aarch64_sve_fmul       : AdvSIMD_Pred2VectorArg_Intrinsic;
 def int_aarch64_sve_fmulx      : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fneg       : AdvSIMD_Merged1VectorArg_Intrinsic;
 def int_aarch64_sve_fmul_lane  : AdvSIMD_2VectorArgIndexed_Intrinsic;
 def int_aarch64_sve_fnmad      : AdvSIMD_Pred3VectorArg_Intrinsic;
 def int_aarch64_sve_fnmla      : AdvSIMD_Pred3VectorArg_Intrinsic;
 def int_aarch64_sve_fnmls      : AdvSIMD_Pred3VectorArg_Intrinsic;
 def int_aarch64_sve_fnmsb      : AdvSIMD_Pred3VectorArg_Intrinsic;
+def int_aarch64_sve_frecpe_x   : AdvSIMD_1VectorArg_Intrinsic;
+def int_aarch64_sve_frecps_x   : AdvSIMD_2VectorArg_Intrinsic;
+def int_aarch64_sve_frecpx     : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_frinta     : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_frinti     : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_frintm     : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_frintn     : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_frintp     : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_frintx     : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_frintz     : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_frsqrte_x  : AdvSIMD_1VectorArg_Intrinsic;
+def int_aarch64_sve_frsqrts_x  : AdvSIMD_2VectorArg_Intrinsic;
 def int_aarch64_sve_fscale     : AdvSIMD_SVE_SCALE_Intrinsic;
+def int_aarch64_sve_fsqrt      : AdvSIMD_Merged1VectorArg_Intrinsic;
 def int_aarch64_sve_fsub       : AdvSIMD_Pred2VectorArg_Intrinsic;
 def int_aarch64_sve_fsubr      : AdvSIMD_Pred2VectorArg_Intrinsic;
 def int_aarch64_sve_ftmad_x    : AdvSIMD_2VectorArgIndexed_Intrinsic;
@@ -1002,12 +1023,6 @@ def int_aarch64_sve_ftssel_x   : AdvSIMD_SVE_TSMUL_Intrinsic;
 
 def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<"svcvt_s32_f16_m", llvm_nxv4i32_ty, llvm_nxv8f16_ty>;
 
-//
-// Floating-point arithmetic
-//
-def int_aarch64_sve_frecps_x: AdvSIMD_2VectorArg_Intrinsic;
-def int_aarch64_sve_frsqrts_x: AdvSIMD_2VectorArg_Intrinsic;
-
 //
 // Predicate operations
 //

diff  --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index b2f7cfb9da4c..b1686fe0d470 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -102,8 +102,8 @@ let Predicates = [HasSVE] in {
   defm CNT_ZPmZ  : sve_int_un_pred_arit_1<   0b010, "cnt",  int_aarch64_sve_cnt>;
   defm CNOT_ZPmZ : sve_int_un_pred_arit_1<   0b011, "cnot", int_aarch64_sve_cnot>;
   defm NOT_ZPmZ  : sve_int_un_pred_arit_1<   0b110, "not",  int_aarch64_sve_not>;
-  defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs">;
-  defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg">;
+  defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", int_aarch64_sve_fabs>;
+  defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", int_aarch64_sve_fneg>;
 
   defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", int_aarch64_sve_smax>;
   defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", int_aarch64_sve_umax>;
@@ -112,8 +112,8 @@ let Predicates = [HasSVE] in {
   defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", int_aarch64_sve_sabd>;
   defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", int_aarch64_sve_uabd>;
 
-  defm FRECPE_ZZ  : sve_fp_2op_u_zd<0b110, "frecpe">;
-  defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte">;
+  defm FRECPE_ZZ  : sve_fp_2op_u_zd<0b110, "frecpe",  int_aarch64_sve_frecpe_x>;
+  defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte", int_aarch64_sve_frsqrte_x>;
 
   defm FADD_ZPmI    : sve_fp_2op_i_p_zds<0b000, "fadd", sve_fpimm_half_one>;
   defm FSUB_ZPmI    : sve_fp_2op_i_p_zds<0b001, "fsub", sve_fpimm_half_one>;
@@ -222,9 +222,7 @@ let Predicates = [HasSVE] in {
   defm MOVPRFX_ZPzZ : sve_int_movprfx_pred_zero<0b000, "movprfx">;
   defm MOVPRFX_ZPmZ : sve_int_movprfx_pred_merge<0b001, "movprfx">;
   def MOVPRFX_ZZ : sve_int_bin_cons_misc_0_c<0b00000001, "movprfx", ZPRAny>;
-  def FEXPA_ZZ_H : sve_int_bin_cons_misc_0_c<0b01000000, "fexpa", ZPR16>;
-  def FEXPA_ZZ_S : sve_int_bin_cons_misc_0_c<0b10000000, "fexpa", ZPR32>;
-  def FEXPA_ZZ_D : sve_int_bin_cons_misc_0_c<0b11000000, "fexpa", ZPR64>;
+  defm FEXPA_ZZ : sve_int_bin_cons_misc_0_c_fexpa<"fexpa", int_aarch64_sve_fexpa_x>;
 
   def BRKPA_PPzPP  : sve_int_brkp<0b00, "brkpa">;
   def BRKPAS_PPzPP : sve_int_brkp<0b10, "brkpas">;
@@ -926,15 +924,15 @@ let Predicates = [HasSVE] in {
   def FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, ElementSizeD>;
   def FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, ElementSizeD>;
 
-  defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn">;
-  defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp">;
-  defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm">;
-  defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz">;
-  defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta">;
-  defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx">;
-  defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti">;
-  defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx">;
-  defm FSQRT_ZPmZ  : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt">;
+  defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", int_aarch64_sve_frintn>;
+  defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", int_aarch64_sve_frintp>;
+  defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm", int_aarch64_sve_frintm>;
+  defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz", int_aarch64_sve_frintz>;
+  defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta", int_aarch64_sve_frinta>;
+  defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx", int_aarch64_sve_frintx>;
+  defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", int_aarch64_sve_frinti>;
+  defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", int_aarch64_sve_frecpx>;
+  defm FSQRT_ZPmZ  : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt",  int_aarch64_sve_fsqrt>;
 
   // InstAliases
   def : InstAlias<"mov $Zd, $Zn",

diff  --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 05c396f78bf2..c82e78162f5e 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -1830,10 +1830,14 @@ class sve_fp_2op_p_zd<bits<7> opc, string asm, RegisterOperand i_zprtype,
   let ElementSize = size;
 }
 
-multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm> {
+multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op> {
   def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>;
   def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>;
   def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>;
+
+  def : SVE_3_Op_Pat<nxv8f16, op, nxv8f16, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
+  def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
+  def : SVE_3_Op_Pat<nxv2f64, op, nxv2f64, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
 }
 
 multiclass sve2_fp_flogb<string asm> {
@@ -1863,10 +1867,14 @@ class sve_fp_2op_u_zd<bits<2> sz, bits<3> opc, string asm,
   let Inst{4-0}   = Zd;
 }
 
-multiclass sve_fp_2op_u_zd<bits<3> opc, string asm> {
+multiclass sve_fp_2op_u_zd<bits<3> opc, string asm, SDPatternOperator op> {
   def _H : sve_fp_2op_u_zd<0b01, opc, asm, ZPR16>;
   def _S : sve_fp_2op_u_zd<0b10, opc, asm, ZPR32>;
   def _D : sve_fp_2op_u_zd<0b11, opc, asm, ZPR64>;
+
+  def : SVE_1_Op_Pat<nxv8f16, op, nxv8f16, !cast<Instruction>(NAME # _H)>;
+  def : SVE_1_Op_Pat<nxv4f32, op, nxv4f32, !cast<Instruction>(NAME # _S)>;
+  def : SVE_1_Op_Pat<nxv2f64, op, nxv2f64, !cast<Instruction>(NAME # _D)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -3048,10 +3056,15 @@ multiclass sve_int_un_pred_arit_1<bits<3> opc, string asm,
   def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
 }
 
-multiclass sve_int_un_pred_arit_1_fp<bits<3> opc, string asm> {
+multiclass sve_int_un_pred_arit_1_fp<bits<3> opc, string asm,
+                                     SDPatternOperator op> {
   def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>;
   def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>;
   def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>;
+
+  def : SVE_3_Op_Pat<nxv8f16, op, nxv8f16, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
+  def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
+  def : SVE_3_Op_Pat<nxv2f64, op, nxv2f64, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -5723,6 +5736,16 @@ class sve_int_bin_cons_misc_0_c<bits<8> opc, string asm, ZPRRegOp zprty>
   let Inst{4-0}   = Zd;
 }
 
+multiclass sve_int_bin_cons_misc_0_c_fexpa<string asm, SDPatternOperator op> {
+  def _H : sve_int_bin_cons_misc_0_c<0b01000000, asm, ZPR16>;
+  def _S : sve_int_bin_cons_misc_0_c<0b10000000, asm, ZPR32>;
+  def _D : sve_int_bin_cons_misc_0_c<0b11000000, asm, ZPR64>;
+
+  def : SVE_1_Op_Pat<nxv8f16, op, nxv8i16, !cast<Instruction>(NAME # _H)>;
+  def : SVE_1_Op_Pat<nxv4f32, op, nxv4i32, !cast<Instruction>(NAME # _S)>;
+  def : SVE_1_Op_Pat<nxv2f64, op, nxv2i64, !cast<Instruction>(NAME # _D)>;
+}
+
 //===----------------------------------------------------------------------===//
 // SVE Integer Reduction Group
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
index 8a3b2178ff97..16c8529a87be 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
@@ -34,6 +34,40 @@ define <vscale x 2 x double> @fabd_d(<vscale x 2 x i1> %pg, <vscale x 2 x double
   ret <vscale x 2 x double> %out
 }
 
+;
+; FABS
+;
+
+define <vscale x 8 x half> @fabs_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fabs_h:
+; CHECK: fabs z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half> %a,
+                                                                 <vscale x 8 x i1> %pg,
+                                                                 <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @fabs_s(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fabs_s:
+; CHECK: fabs z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float> %a,
+                                                                  <vscale x 4 x i1> %pg,
+                                                                  <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fabs_d(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fabs_d:
+; CHECK: fabs z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double> %a,
+                                                                   <vscale x 2 x i1> %pg,
+                                                                   <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %out
+}
+
 ;
 ; FADD
 ;
@@ -241,6 +275,34 @@ define <vscale x 2 x double> @fdivr_d(<vscale x 2 x i1> %pg, <vscale x 2 x doubl
   ret <vscale x 2 x double> %out
 }
 
+;
+; FEXPA
+;
+
+define <vscale x 8 x half> @fexpa_h(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: fexpa_h:
+; CHECK: fexpa z0.h, z0.h
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fexpa.x.nxv8f16(<vscale x 8 x i16> %a)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @fexpa_s(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: fexpa_s:
+; CHECK: fexpa z0.s, z0.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fexpa.x.nxv4f32(<vscale x 4 x i32> %a)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fexpa_d(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
+; CHECK-LABEL: fexpa_d:
+; CHECK: fexpa z0.d, z0.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fexpa.x.nxv2f64(<vscale x 2 x i64> %a)
+  ret <vscale x 2 x double> %out
+}
+
 ;
 ; FMAD
 ;
@@ -702,36 +764,36 @@ define <vscale x 2 x double> @fmulx_d(<vscale x 2 x i1> %pg, <vscale x 2 x doubl
 }
 
 ;
-; FSCALE
+; FNEG
 ;
 
-define <vscale x 8 x half> @fscale_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x i16> %b) {
-; CHECK-LABEL: fscale_h:
-; CHECK: fscale z0.h, p0/m, z0.h, z1.h
+define <vscale x 8 x half> @fneg_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fneg_h:
+; CHECK: fneg z0.h, p0/m, z1.h
 ; CHECK-NEXT: ret
-  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fscale.nxv8f16(<vscale x 8 x i1> %pg,
-                                                                   <vscale x 8 x half> %a,
-                                                                   <vscale x 8 x i16> %b)
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fneg.nxv8f16(<vscale x 8 x half> %a,
+                                                                 <vscale x 8 x i1> %pg,
+                                                                 <vscale x 8 x half> %b)
   ret <vscale x 8 x half> %out
 }
 
-define <vscale x 4 x float> @fscale_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x i32> %b) {
-; CHECK-LABEL: fscale_s:
-; CHECK: fscale z0.s, p0/m, z0.s, z1.s
+define <vscale x 4 x float> @fneg_s(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fneg_s:
+; CHECK: fneg z0.s, p0/m, z1.s
 ; CHECK-NEXT: ret
-  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fscale.nxv4f32(<vscale x 4 x i1> %pg,
-                                                                    <vscale x 4 x float> %a,
-                                                                    <vscale x 4 x i32> %b)
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fneg.nxv4f32(<vscale x 4 x float> %a,
+                                                                  <vscale x 4 x i1> %pg,
+                                                                  <vscale x 4 x float> %b)
   ret <vscale x 4 x float> %out
 }
 
-define <vscale x 2 x double> @fscale_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: fscale_d:
-; CHECK: fscale z0.d, p0/m, z0.d, z1.d
+define <vscale x 2 x double> @fneg_d(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fneg_d:
+; CHECK: fneg z0.d, p0/m, z1.d
 ; CHECK-NEXT: ret
-  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fscale.nxv2f64(<vscale x 2 x i1> %pg,
-                                                                     <vscale x 2 x double> %a,
-                                                                     <vscale x 2 x i64> %b)
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fneg.nxv2f64(<vscale x 2 x double> %a,
+                                                                   <vscale x 2 x i1> %pg,
+                                                                   <vscale x 2 x double> %b)
   ret <vscale x 2 x double> %out
 }
 
@@ -883,6 +945,402 @@ define <vscale x 2 x double> @fnmsb_d(<vscale x 2 x i1> %pg, <vscale x 2 x doubl
   ret <vscale x 2 x double> %out
 }
 
+;
+; FRECPE
+;
+
+define <vscale x 8 x half> @frecpe_h(<vscale x 8 x half> %a) {
+; CHECK-LABEL: frecpe_h:
+; CHECK: frecpe z0.h, z0.h
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.frecpe.x.nxv8f16(<vscale x 8 x half> %a)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @frecpe_s(<vscale x 4 x float> %a) {
+; CHECK-LABEL: frecpe_s:
+; CHECK: frecpe z0.s, z0.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.frecpe.x.nxv4f32(<vscale x 4 x float> %a)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @frecpe_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
+; CHECK-LABEL: frecpe_d:
+; CHECK: frecpe z0.d, z0.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.frecpe.x.nxv2f64(<vscale x 2 x double> %a)
+  ret <vscale x 2 x double> %out
+}
+
+;
+; FRECPX
+;
+
+define <vscale x 8 x half> @frecpx_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
+; CHECK-LABEL: frecpx_h:
+; CHECK: frecpx z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.frecpx.nxv8f16(<vscale x 8 x half> %a,
+                                                                  <vscale x 8 x i1> %pg,
+                                                                  <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @frecpx_s(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
+; CHECK-LABEL: frecpx_s:
+; CHECK: frecpx z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.frecpx.nxv4f32(<vscale x 4 x float> %a,
+                                                                   <vscale x 4 x i1> %pg,
+                                                                   <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @frecpx_d(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
+; CHECK-LABEL: frecpx_d:
+; CHECK: frecpx z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.frecpx.nxv2f64(<vscale x 2 x double> %a,
+                                                                    <vscale x 2 x i1> %pg,
+                                                                    <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %out
+}
+
+;
+; FRINTA
+;
+
+define <vscale x 8 x half> @frinta_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
+; CHECK-LABEL: frinta_h:
+; CHECK: frinta z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.frinta.nxv8f16(<vscale x 8 x half> %a,
+                                                                   <vscale x 8 x i1> %pg,
+                                                                   <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @frinta_s(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
+; CHECK-LABEL: frinta_s:
+; CHECK: frinta z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.frinta.nxv4f32(<vscale x 4 x float> %a,
+                                                                    <vscale x 4 x i1> %pg,
+                                                                    <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @frinta_d(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
+; CHECK-LABEL: frinta_d:
+; CHECK: frinta z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.frinta.nxv2f64(<vscale x 2 x double> %a,
+                                                                     <vscale x 2 x i1> %pg,
+                                                                     <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %out
+}
+
+;
+; FRINTI
+;
+
+define <vscale x 8 x half> @frinti_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
+; CHECK-LABEL: frinti_h:
+; CHECK: frinti z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.frinti.nxv8f16(<vscale x 8 x half> %a,
+                                                                   <vscale x 8 x i1> %pg,
+                                                                   <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @frinti_s(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
+; CHECK-LABEL: frinti_s:
+; CHECK: frinti z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.frinti.nxv4f32(<vscale x 4 x float> %a,
+                                                                    <vscale x 4 x i1> %pg,
+                                                                    <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @frinti_d(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
+; CHECK-LABEL: frinti_d:
+; CHECK: frinti z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.frinti.nxv2f64(<vscale x 2 x double> %a,
+                                                                     <vscale x 2 x i1> %pg,
+                                                                     <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %out
+}
+
+;
+; FRINTM
+;
+
+define <vscale x 8 x half> @frintm_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
+; CHECK-LABEL: frintm_h:
+; CHECK: frintm z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.frintm.nxv8f16(<vscale x 8 x half> %a,
+                                                                   <vscale x 8 x i1> %pg,
+                                                                   <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @frintm_s(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
+; CHECK-LABEL: frintm_s:
+; CHECK: frintm z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.frintm.nxv4f32(<vscale x 4 x float> %a,
+                                                                    <vscale x 4 x i1> %pg,
+                                                                    <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @frintm_d(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
+; CHECK-LABEL: frintm_d:
+; CHECK: frintm z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.frintm.nxv2f64(<vscale x 2 x double> %a,
+                                                                     <vscale x 2 x i1> %pg,
+                                                                     <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %out
+}
+
+;
+; FRINTN
+;
+
+define <vscale x 8 x half> @frintn_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
+; CHECK-LABEL: frintn_h:
+; CHECK: frintn z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.frintn.nxv8f16(<vscale x 8 x half> %a,
+                                                                   <vscale x 8 x i1> %pg,
+                                                                   <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @frintn_s(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
+; CHECK-LABEL: frintn_s:
+; CHECK: frintn z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.frintn.nxv4f32(<vscale x 4 x float> %a,
+                                                                    <vscale x 4 x i1> %pg,
+                                                                    <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @frintn_d(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
+; CHECK-LABEL: frintn_d:
+; CHECK: frintn z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.frintn.nxv2f64(<vscale x 2 x double> %a,
+                                                                     <vscale x 2 x i1> %pg,
+                                                                     <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %out
+}
+
+;
+; FRINTP
+;
+
+define <vscale x 8 x half> @frintp_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
+; CHECK-LABEL: frintp_h:
+; CHECK: frintp z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.frintp.nxv8f16(<vscale x 8 x half> %a,
+                                                                   <vscale x 8 x i1> %pg,
+                                                                   <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @frintp_s(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
+; CHECK-LABEL: frintp_s:
+; CHECK: frintp z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.frintp.nxv4f32(<vscale x 4 x float> %a,
+                                                                    <vscale x 4 x i1> %pg,
+                                                                    <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @frintp_d(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
+; CHECK-LABEL: frintp_d:
+; CHECK: frintp z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.frintp.nxv2f64(<vscale x 2 x double> %a,
+                                                                     <vscale x 2 x i1> %pg,
+                                                                     <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %out
+}
+
+;
+; FRINTX
+;
+
+define <vscale x 8 x half> @frintx_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
+; CHECK-LABEL: frintx_h:
+; CHECK: frintx z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.frintx.nxv8f16(<vscale x 8 x half> %a,
+                                                                   <vscale x 8 x i1> %pg,
+                                                                   <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @frintx_s(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
+; CHECK-LABEL: frintx_s:
+; CHECK: frintx z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.frintx.nxv4f32(<vscale x 4 x float> %a,
+                                                                    <vscale x 4 x i1> %pg,
+                                                                    <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @frintx_d(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
+; CHECK-LABEL: frintx_d:
+; CHECK: frintx z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.frintx.nxv2f64(<vscale x 2 x double> %a,
+                                                                     <vscale x 2 x i1> %pg,
+                                                                     <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %out
+}
+
+;
+; FRINTZ
+;
+
+define <vscale x 8 x half> @frintz_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
+; CHECK-LABEL: frintz_h:
+; CHECK: frintz z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.frintz.nxv8f16(<vscale x 8 x half> %a,
+                                                                   <vscale x 8 x i1> %pg,
+                                                                   <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @frintz_s(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
+; CHECK-LABEL: frintz_s:
+; CHECK: frintz z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.frintz.nxv4f32(<vscale x 4 x float> %a,
+                                                                    <vscale x 4 x i1> %pg,
+                                                                    <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @frintz_d(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
+; CHECK-LABEL: frintz_d:
+; CHECK: frintz z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.frintz.nxv2f64(<vscale x 2 x double> %a,
+                                                                     <vscale x 2 x i1> %pg,
+                                                                     <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %out
+}
+
+;
+; FRSQRTE
+;
+
+define <vscale x 8 x half> @frsqrte_h(<vscale x 8 x half> %a) {
+; CHECK-LABEL: frsqrte_h:
+; CHECK: frsqrte z0.h, z0.h
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.frsqrte.x.nxv8f16(<vscale x 8 x half> %a)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @frsqrte_s(<vscale x 4 x float> %a) {
+; CHECK-LABEL: frsqrte_s:
+; CHECK: frsqrte z0.s, z0.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.frsqrte.x.nxv4f32(<vscale x 4 x float> %a)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @frsqrte_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
+; CHECK-LABEL: frsqrte_d:
+; CHECK: frsqrte z0.d, z0.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.frsqrte.x.nxv2f64(<vscale x 2 x double> %a)
+  ret <vscale x 2 x double> %out
+}
+
+;
+; FSCALE
+;
+
+define <vscale x 8 x half> @fscale_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: fscale_h:
+; CHECK: fscale z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fscale.nxv8f16(<vscale x 8 x i1> %pg,
+                                                                   <vscale x 8 x half> %a,
+                                                                   <vscale x 8 x i16> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @fscale_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: fscale_s:
+; CHECK: fscale z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fscale.nxv4f32(<vscale x 4 x i1> %pg,
+                                                                    <vscale x 4 x float> %a,
+                                                                    <vscale x 4 x i32> %b)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fscale_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: fscale_d:
+; CHECK: fscale z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fscale.nxv2f64(<vscale x 2 x i1> %pg,
+                                                                     <vscale x 2 x double> %a,
+                                                                     <vscale x 2 x i64> %b)
+  ret <vscale x 2 x double> %out
+}
+
+;
+; FSQRT
+;
+
+define <vscale x 8 x half> @fsqrt_h(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fsqrt_h:
+; CHECK: fsqrt z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fsqrt.nxv8f16(<vscale x 8 x half> %a,
+                                                                  <vscale x 8 x i1> %pg,
+                                                                  <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @fsqrt_s(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fsqrt_s:
+; CHECK: fsqrt z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fsqrt.nxv4f32(<vscale x 4 x float> %a,
+                                                                   <vscale x 4 x i1> %pg,
+                                                                   <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fsqrt_d(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fsqrt_d:
+; CHECK: fsqrt z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fsqrt.nxv2f64(<vscale x 2 x double> %a,
+                                                                    <vscale x 2 x i1> %pg,
+                                                                    <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %out
+}
+
 ;
 ; FSUB
 ;
@@ -1051,6 +1509,10 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fabd.nxv8f16(<vscale x 8 x i1>, <v
 declare <vscale x 4 x float> @llvm.aarch64.sve.fabd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
 declare <vscale x 2 x double> @llvm.aarch64.sve.fabd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
 
+declare <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>)
+
 declare <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
 declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
 declare <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
@@ -1074,6 +1536,10 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fdivr.nxv8f16(<vscale x 8 x i1>, <
 declare <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
 declare <vscale x 2 x double> @llvm.aarch64.sve.fdivr.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
 
+declare <vscale x 8 x half> @llvm.aarch64.sve.fexpa.x.nxv8f16(<vscale x 8 x i16>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fexpa.x.nxv4f32(<vscale x 4 x i32>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fexpa.x.nxv2f64(<vscale x 2 x i64>)
+
 declare <vscale x 8 x half> @llvm.aarch64.sve.fmad.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
 declare <vscale x 4 x float> @llvm.aarch64.sve.fmad.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
 declare <vscale x 2 x double> @llvm.aarch64.sve.fmad.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
@@ -1126,6 +1592,10 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fmulx.nxv8f16(<vscale x 8 x i1>, <
 declare <vscale x 4 x float> @llvm.aarch64.sve.fmulx.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
 declare <vscale x 2 x double> @llvm.aarch64.sve.fmulx.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
 
+declare <vscale x 8 x half> @llvm.aarch64.sve.fneg.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fneg.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fneg.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>)
+
 declare <vscale x 8 x half> @llvm.aarch64.sve.fnmad.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
 declare <vscale x 4 x float> @llvm.aarch64.sve.fnmad.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
 declare <vscale x 2 x double> @llvm.aarch64.sve.fnmad.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
@@ -1142,10 +1612,54 @@ declare <vscale x 8 x half> @llvm.aarch64.sve.fnmsb.nxv8f16(<vscale x 8 x i1>, <
 declare <vscale x 4 x float> @llvm.aarch64.sve.fnmsb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
 declare <vscale x 2 x double> @llvm.aarch64.sve.fnmsb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
 
+declare <vscale x 8 x half> @llvm.aarch64.sve.frecpe.x.nxv8f16(<vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.frecpe.x.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.frecpe.x.nxv2f64(<vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.frecpx.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.frecpx.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.frecpx.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.frinta.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.frinta.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.frinta.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.frinti.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.frinti.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.frinti.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.frintm.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.frintm.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.frintm.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.frintn.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.frintn.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.frintn.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.frintp.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.frintp.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.frintp.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.frintx.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.frintx.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.frintx.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.frintz.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.frintz.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.frintz.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.frsqrte.x.nxv8f16(<vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.frsqrte.x.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.frsqrte.x.nxv2f64(<vscale x 2 x double>)
+
 declare <vscale x 8 x half> @llvm.aarch64.sve.fscale.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x i16>)
 declare <vscale x 4 x float> @llvm.aarch64.sve.fscale.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x i32>)
 declare <vscale x 2 x double> @llvm.aarch64.sve.fscale.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x i64>)
 
+declare <vscale x 8 x half> @llvm.aarch64.sve.fsqrt.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fsqrt.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fsqrt.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>)
+
 declare <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
 declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
 declare <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)