[llvm] 1ffcbe3 - [AArch64][SVE] Add lowering for rounding operations

Fri Sep 4 08:18:27 PDT 2020

Author: Muhammad Asif Manzoor
Date: 2020-09-04T11:16:57-04:00
New Revision: 1ffcbe35ae0e136d7dd4cdd77eda306cc98b00e7

URL: https://github.com/llvm/llvm-project/commit/1ffcbe35ae0e136d7dd4cdd77eda306cc98b00e7
DIFF: https://github.com/llvm/llvm-project/commit/1ffcbe35ae0e136d7dd4cdd77eda306cc98b00e7.diff

LOG: [AArch64][SVE] Add lowering for rounding operations

Add the functionality to lower SVE rounding operations for passthru variant.
Created a new test case file for all rounding operations.

Reviewed By: paulwalker-arm

Differential Revision: https://reviews.llvm.org/D86793

Added: 
    llvm/test/CodeGen/AArch64/sve-fp-rounding.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/lib/Target/AArch64/AArch64ISelLowering.h
    llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
    llvm/test/CodeGen/AArch64/sve-fp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e1b79393f25f..063644716a65 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -139,6 +139,12 @@ static bool isMergePassthruOpcode(unsigned Opc) {
   case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:
   case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:
   case AArch64ISD::FCEIL_MERGE_PASSTHRU:
+  case AArch64ISD::FFLOOR_MERGE_PASSTHRU:
+  case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU:
+  case AArch64ISD::FRINT_MERGE_PASSTHRU:
+  case AArch64ISD::FROUND_MERGE_PASSTHRU:
+  case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU:
+  case AArch64ISD::FTRUNC_MERGE_PASSTHRU:
     return true;
   }
 }
@@ -976,6 +982,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
         setOperationAction(ISD::FNEG, VT, Custom);
         setOperationAction(ISD::FSUB, VT, Custom);
         setOperationAction(ISD::FCEIL, VT, Custom);
+        setOperationAction(ISD::FFLOOR, VT, Custom);
+        setOperationAction(ISD::FNEARBYINT, VT, Custom);
+        setOperationAction(ISD::FRINT, VT, Custom);
+        setOperationAction(ISD::FROUND, VT, Custom);
+        setOperationAction(ISD::FROUNDEVEN, VT, Custom);
+        setOperationAction(ISD::FTRUNC, VT, Custom);
       }
     }
 
@@ -1482,6 +1494,12 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU)
     MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU)
     MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU)
+    MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU)
+    MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU)
+    MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU)
+    MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU)
+    MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU)
+    MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU)
     MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)
     MAKE_CASE(AArch64ISD::ADC)
     MAKE_CASE(AArch64ISD::SBC)
@@ -3346,6 +3364,24 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::aarch64_sve_frintp:
     return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(),
                        Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+  case Intrinsic::aarch64_sve_frintm:
+    return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(),
+                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));                     
+  case Intrinsic::aarch64_sve_frinti:
+    return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(),
+                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+  case Intrinsic::aarch64_sve_frintx:
+    return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(),
+                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+  case Intrinsic::aarch64_sve_frinta:
+    return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(),
+                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+  case Intrinsic::aarch64_sve_frintn:
+    return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(),
+                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+  case Intrinsic::aarch64_sve_frintz:
+    return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(),
+                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
   case Intrinsic::aarch64_sve_convert_to_svbool: {
     EVT OutVT = Op.getValueType();
     EVT InVT = Op.getOperand(1).getValueType();
@@ -3645,6 +3681,18 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
     return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
   case ISD::FCEIL:
     return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
+  case ISD::FFLOOR:
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
+  case ISD::FNEARBYINT:
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
+  case ISD::FRINT:
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
+  case ISD::FROUND:
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
+  case ISD::FROUNDEVEN:
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
+  case ISD::FTRUNC:
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
   case ISD::FP_ROUND:
   case ISD::STRICT_FP_ROUND:
     return LowerFP_ROUND(Op, DAG);

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 38caa6a48141..d6e511891752 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -96,7 +96,13 @@ enum NodeType : unsigned {
   // Predicated instructions with the result of inactive lanes provided by the
   // last operand.
   FCEIL_MERGE_PASSTHRU,
+  FFLOOR_MERGE_PASSTHRU,
+  FNEARBYINT_MERGE_PASSTHRU,
   FNEG_MERGE_PASSTHRU,
+  FRINT_MERGE_PASSTHRU,
+  FROUND_MERGE_PASSTHRU,
+  FROUNDEVEN_MERGE_PASSTHRU,
+  FTRUNC_MERGE_PASSTHRU,
   SIGN_EXTEND_INREG_MERGE_PASSTHRU,
   ZERO_EXTEND_INREG_MERGE_PASSTHRU,
 

diff  --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index eadf23dc4622..e01a34242a8d 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -203,6 +203,12 @@ def AArch64fneg_mt : SDNode<"AArch64ISD::FNEG_MERGE_PASSTHRU", SDT_AArch64Arith>
 def AArch64sxt_mt  : SDNode<"AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>;
 def AArch64uxt_mt  : SDNode<"AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>;
 def AArch64frintp_mt : SDNode<"AArch64ISD::FCEIL_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64frintm_mt : SDNode<"AArch64ISD::FFLOOR_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64frinti_mt : SDNode<"AArch64ISD::FNEARBYINT_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64frintx_mt : SDNode<"AArch64ISD::FRINT_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64frinta_mt : SDNode<"AArch64ISD::FROUND_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64frintn_mt : SDNode<"AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64frintz_mt : SDNode<"AArch64ISD::FTRUNC_MERGE_PASSTHRU", SDT_AArch64Arith>;
 
 def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>;
 def AArch64clasta_n   : SDNode<"AArch64ISD::CLASTA_N",   SDT_AArch64ReduceWithInit>;
@@ -1416,13 +1422,13 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
   defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, int_aarch64_sve_fcvtzs,         nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
   defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, int_aarch64_sve_fcvtzu,         nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
 
-  defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", int_aarch64_sve_frintn>;
-  defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", int_aarch64_sve_frintp, AArch64frintp_mt>;
-  defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm", int_aarch64_sve_frintm>;
-  defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz", int_aarch64_sve_frintz>;
-  defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta", int_aarch64_sve_frinta>;
-  defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx", int_aarch64_sve_frintx>;
-  defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", int_aarch64_sve_frinti>;
+  defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", null_frag, AArch64frintn_mt>;
+  defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", null_frag, AArch64frintp_mt>;
+  defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm", null_frag, AArch64frintm_mt>;
+  defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz", null_frag, AArch64frintz_mt>;
+  defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta", null_frag, AArch64frinta_mt>;
+  defm FRINTX_ZPmZ : sve_fp_2op_p_zd_HSD<0b00110, "frintx", null_frag, AArch64frintx_mt>;
+  defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", null_frag, AArch64frinti_mt>;
   defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", int_aarch64_sve_frecpx>;
   defm FSQRT_ZPmZ  : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt",  int_aarch64_sve_fsqrt>;
 

diff  --git a/llvm/test/CodeGen/AArch64/sve-fp-rounding.ll b/llvm/test/CodeGen/AArch64/sve-fp-rounding.ll
new file mode 100644
index 000000000000..0a31271d3f8b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-fp-rounding.ll
@@ -0,0 +1,485 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+; FCEIL
+
+define <vscale x 8 x half> @frintp_nxv8f16(<vscale x 8 x half> %a) {
+; CHECK-LABEL: frintp_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    frintp z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half> %a)
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x half> @frintp_nxv4f16(<vscale x 4 x half> %a) {
+; CHECK-LABEL: frintp_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    frintp z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %a)
+  ret <vscale x 4 x half> %res
+}
+
+define <vscale x 2 x half> @frintp_nxv2f16(<vscale x 2 x half> %a) {
+; CHECK-LABEL: frintp_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    frintp z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half> %a)
+  ret <vscale x 2 x half> %res
+}
+
+define <vscale x 4 x float> @frintp_nxv4f32(<vscale x 4 x float> %a) {
+; CHECK-LABEL: frintp_nxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    frintp z0.s, p0/m, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %a)
+  ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x float> @frintp_nxv2f32(<vscale x 2 x float> %a) {
+; CHECK-LABEL: frintp_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    frintp z0.s, p0/m, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x float> @llvm.ceil.nxv2f32(<vscale x 2 x float> %a)
+  ret <vscale x 2 x float> %res
+}
+
+define <vscale x 2 x double> @frintp_nxv2f64(<vscale x 2 x double> %a) {
+; CHECK-LABEL: frintp_nxv2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    frintp z0.d, p0/m, z0.d
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> %a)
+  ret <vscale x 2 x double> %res
+}
+
+; FFLOOR
+
+define <vscale x 8 x half> @frintm_nxv8f16(<vscale x 8 x half> %a) {
+; CHECK-LABEL: frintm_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    frintm z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x half> @llvm.floor.nxv8f16(<vscale x 8 x half> %a)
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x half> @frintm_nxv4f16(<vscale x 4 x half> %a) {
+; CHECK-LABEL: frintm_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    frintm z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x half> @llvm.floor.nxv4f16(<vscale x 4 x half> %a)
+  ret <vscale x 4 x half> %res
+}
+
+define <vscale x 2 x half> @frintm_nxv2f16(<vscale x 2 x half> %a) {
+; CHECK-LABEL: frintm_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    frintm z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x half> @llvm.floor.nxv2f16(<vscale x 2 x half> %a)
+  ret <vscale x 2 x half> %res
+}
+
+define <vscale x 4 x float> @frintm_nxv4f32(<vscale x 4 x float> %a) {
+; CHECK-LABEL: frintm_nxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    frintm z0.s, p0/m, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> %a)
+  ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x float> @frintm_nxv2f32(<vscale x 2 x float> %a) {
+; CHECK-LABEL: frintm_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    frintm z0.s, p0/m, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x float> @llvm.floor.nxv2f32(<vscale x 2 x float> %a)
+  ret <vscale x 2 x float> %res
+}
+
+define <vscale x 2 x double> @frintm_nxv2f64(<vscale x 2 x double> %a) {
+; CHECK-LABEL: frintm_nxv2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    frintm z0.d, p0/m, z0.d
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> %a)
+  ret <vscale x 2 x double> %res
+}
+
+; FNEARBYINT
+
+define <vscale x 8 x half> @frinti_nxv8f16(<vscale x 8 x half> %a) {
+; CHECK-LABEL: frinti_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    frinti z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x half> @llvm.nearbyint.nxv8f16(<vscale x 8 x half> %a)
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x half> @frinti_nxv4f16(<vscale x 4 x half> %a) {
+; CHECK-LABEL: frinti_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    frinti z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x half> @llvm.nearbyint.nxv4f16(<vscale x 4 x half> %a)
+  ret <vscale x 4 x half> %res
+}
+
+define <vscale x 2 x half> @frinti_nxv2f16(<vscale x 2 x half> %a) {
+; CHECK-LABEL: frinti_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    frinti z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x half> @llvm.nearbyint.nxv2f16(<vscale x 2 x half> %a)
+  ret <vscale x 2 x half> %res
+}
+
+define <vscale x 4 x float> @frinti_nxv4f32(<vscale x 4 x float> %a) {
+; CHECK-LABEL: frinti_nxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    frinti z0.s, p0/m, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float> %a)
+  ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x float> @frinti_nxv2f32(<vscale x 2 x float> %a) {
+; CHECK-LABEL: frinti_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    frinti z0.s, p0/m, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x float> @llvm.nearbyint.nxv2f32(<vscale x 2 x float> %a)
+  ret <vscale x 2 x float> %res
+}
+
+define <vscale x 2 x double> @frinti_nxv2f64(<vscale x 2 x double> %a) {
+; CHECK-LABEL: frinti_nxv2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    frinti z0.d, p0/m, z0.d
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double> %a)
+  ret <vscale x 2 x double> %res
+}
+
+; FRINT
+
+define <vscale x 8 x half> @frintx_nxv8f16(<vscale x 8 x half> %a) {
+; CHECK-LABEL: frintx_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    frintx z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x half> @llvm.rint.nxv8f16(<vscale x 8 x half> %a)
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x half> @frintx_nxv4f16(<vscale x 4 x half> %a) {
+; CHECK-LABEL: frintx_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    frintx z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x half> @llvm.rint.nxv4f16(<vscale x 4 x half> %a)
+  ret <vscale x 4 x half> %res
+}
+
+define <vscale x 2 x half> @frintx_nxv2f16(<vscale x 2 x half> %a) {
+; CHECK-LABEL: frintx_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    frintx z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x half> @llvm.rint.nxv2f16(<vscale x 2 x half> %a)
+  ret <vscale x 2 x half> %res
+}
+
+define <vscale x 4 x float> @frintx_nxv4f32(<vscale x 4 x float> %a) {
+; CHECK-LABEL: frintx_nxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    frintx z0.s, p0/m, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> %a)
+  ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x float> @frintx_nxv2f32(<vscale x 2 x float> %a) {
+; CHECK-LABEL: frintx_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    frintx z0.s, p0/m, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x float> @llvm.rint.nxv2f32(<vscale x 2 x float> %a)
+  ret <vscale x 2 x float> %res
+}
+
+define <vscale x 2 x double> @frintx_nxv2f64(<vscale x 2 x double> %a) {
+; CHECK-LABEL: frintx_nxv2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    frintx z0.d, p0/m, z0.d
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x double> @llvm.rint.nxv2f64(<vscale x 2 x double> %a)
+  ret <vscale x 2 x double> %res
+}
+
+; ROUND
+
+define <vscale x 8 x half> @frinta_nxv8f16(<vscale x 8 x half> %a) {
+; CHECK-LABEL: frinta_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    frinta z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x half> @llvm.round.nxv8f16(<vscale x 8 x half> %a)
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x half> @frinta_nxv4f16(<vscale x 4 x half> %a) {
+; CHECK-LABEL: frinta_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    frinta z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x half> @llvm.round.nxv4f16(<vscale x 4 x half> %a)
+  ret <vscale x 4 x half> %res
+}
+
+define <vscale x 2 x half> @frinta_nxv2f16(<vscale x 2 x half> %a) {
+; CHECK-LABEL: frinta_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    frinta z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x half> @llvm.round.nxv2f16(<vscale x 2 x half> %a)
+  ret <vscale x 2 x half> %res
+}
+
+define <vscale x 4 x float> @frinta_nxv4f32(<vscale x 4 x float> %a) {
+; CHECK-LABEL: frinta_nxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    frinta z0.s, p0/m, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float> %a)
+  ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x float> @frinta_nxv2f32(<vscale x 2 x float> %a) {
+; CHECK-LABEL: frinta_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    frinta z0.s, p0/m, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x float> @llvm.round.nxv2f32(<vscale x 2 x float> %a)
+  ret <vscale x 2 x float> %res
+}
+
+define <vscale x 2 x double> @frinta_nxv2f64(<vscale x 2 x double> %a) {
+; CHECK-LABEL: frinta_nxv2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    frinta z0.d, p0/m, z0.d
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double> %a)
+  ret <vscale x 2 x double> %res
+}
+
+; ROUNDEVEN
+
+define <vscale x 8 x half> @frintn_nxv8f16(<vscale x 8 x half> %a) {
+; CHECK-LABEL: frintn_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    frintn z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x half> @llvm.roundeven.nxv8f16(<vscale x 8 x half> %a)
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x half> @frintn_nxv4f16(<vscale x 4 x half> %a) {
+; CHECK-LABEL: frintn_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    frintn z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x half> @llvm.roundeven.nxv4f16(<vscale x 4 x half> %a)
+  ret <vscale x 4 x half> %res
+}
+
+define <vscale x 2 x half> @frintn_nxv2f16(<vscale x 2 x half> %a) {
+; CHECK-LABEL: frintn_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    frintn z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x half> @llvm.roundeven.nxv2f16(<vscale x 2 x half> %a)
+  ret <vscale x 2 x half> %res
+}
+
+define <vscale x 4 x float> @frintn_nxv4f32(<vscale x 4 x float> %a) {
+; CHECK-LABEL: frintn_nxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    frintn z0.s, p0/m, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x float> @llvm.roundeven.nxv4f32(<vscale x 4 x float> %a)
+  ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x float> @frintn_nxv2f32(<vscale x 2 x float> %a) {
+; CHECK-LABEL: frintn_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    frintn z0.s, p0/m, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x float> @llvm.roundeven.nxv2f32(<vscale x 2 x float> %a)
+  ret <vscale x 2 x float> %res
+}
+
+define <vscale x 2 x double> @frintn_nxv2f64(<vscale x 2 x double> %a) {
+; CHECK-LABEL: frintn_nxv2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    frintn z0.d, p0/m, z0.d
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x double> @llvm.roundeven.nxv2f64(<vscale x 2 x double> %a)
+  ret <vscale x 2 x double> %res
+}
+
+; FTRUNC
+
+define <vscale x 8 x half> @frintz_nxv8f16(<vscale x 8 x half> %a) {
+; CHECK-LABEL: frintz_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    frintz z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x half> @llvm.trunc.nxv8f16(<vscale x 8 x half> %a)
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x half> @frintz_nxv4f16(<vscale x 4 x half> %a) {
+; CHECK-LABEL: frintz_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    frintz z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x half> @llvm.trunc.nxv4f16(<vscale x 4 x half> %a)
+  ret <vscale x 4 x half> %res
+}
+
+define <vscale x 2 x half> @frintz_nxv2f16(<vscale x 2 x half> %a) {
+; CHECK-LABEL: frintz_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    frintz z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x half> @llvm.trunc.nxv2f16(<vscale x 2 x half> %a)
+  ret <vscale x 2 x half> %res
+}
+
+define <vscale x 4 x float> @frintz_nxv4f32(<vscale x 4 x float> %a) {
+; CHECK-LABEL: frintz_nxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    frintz z0.s, p0/m, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> %a)
+  ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x float> @frintz_nxv2f32(<vscale x 2 x float> %a) {
+; CHECK-LABEL: frintz_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    frintz z0.s, p0/m, z0.s
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x float> @llvm.trunc.nxv2f32(<vscale x 2 x float> %a)
+  ret <vscale x 2 x float> %res
+}
+
+define <vscale x 2 x double> @frintz_nxv2f64(<vscale x 2 x double> %a) {
+; CHECK-LABEL: frintz_nxv2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    frintz z0.d, p0/m, z0.d
+; CHECK-NEXT:    ret
+  %res = call <vscale x 2 x double> @llvm.trunc.nxv2f64(<vscale x 2 x double> %a)
+  ret <vscale x 2 x double> %res
+}
+
+declare <vscale x 8 x half> @llvm.ceil.nxv8f16( <vscale x 8 x half>)
+declare <vscale x 4 x half> @llvm.ceil.nxv4f16( <vscale x 4 x half>)
+declare <vscale x 2 x half> @llvm.ceil.nxv2f16( <vscale x 2 x half>)
+declare <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x float> @llvm.ceil.nxv2f32(<vscale x 2 x float>)
+declare <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.floor.nxv8f16( <vscale x 8 x half>)
+declare <vscale x 4 x half> @llvm.floor.nxv4f16( <vscale x 4 x half>)
+declare <vscale x 2 x half> @llvm.floor.nxv2f16( <vscale x 2 x half>)
+declare <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x float> @llvm.floor.nxv2f32(<vscale x 2 x float>)
+declare <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.nearbyint.nxv8f16( <vscale x 8 x half>)
+declare <vscale x 4 x half> @llvm.nearbyint.nxv4f16( <vscale x 4 x half>)
+declare <vscale x 2 x half> @llvm.nearbyint.nxv2f16( <vscale x 2 x half>)
+declare <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x float> @llvm.nearbyint.nxv2f32(<vscale x 2 x float>)
+declare <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.rint.nxv8f16( <vscale x 8 x half>)
+declare <vscale x 4 x half> @llvm.rint.nxv4f16( <vscale x 4 x half>)
+declare <vscale x 2 x half> @llvm.rint.nxv2f16( <vscale x 2 x half>)
+declare <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x float> @llvm.rint.nxv2f32(<vscale x 2 x float>)
+declare <vscale x 2 x double> @llvm.rint.nxv2f64(<vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.round.nxv8f16( <vscale x 8 x half>)
+declare <vscale x 4 x half> @llvm.round.nxv4f16( <vscale x 4 x half>)
+declare <vscale x 2 x half> @llvm.round.nxv2f16( <vscale x 2 x half>)
+declare <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x float> @llvm.round.nxv2f32(<vscale x 2 x float>)
+declare <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.roundeven.nxv8f16( <vscale x 8 x half>)
+declare <vscale x 4 x half> @llvm.roundeven.nxv4f16( <vscale x 4 x half>)
+declare <vscale x 2 x half> @llvm.roundeven.nxv2f16( <vscale x 2 x half>)
+declare <vscale x 4 x float> @llvm.roundeven.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x float> @llvm.roundeven.nxv2f32(<vscale x 2 x float>)
+declare <vscale x 2 x double> @llvm.roundeven.nxv2f64(<vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.trunc.nxv8f16( <vscale x 8 x half>)
+declare <vscale x 4 x half> @llvm.trunc.nxv4f16( <vscale x 4 x half>)
+declare <vscale x 2 x half> @llvm.trunc.nxv2f16( <vscale x 2 x half>)
+declare <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x float> @llvm.trunc.nxv2f32(<vscale x 2 x float>)
+declare <vscale x 2 x double> @llvm.trunc.nxv2f64(<vscale x 2 x double>)

diff  --git a/llvm/test/CodeGen/AArch64/sve-fp.ll b/llvm/test/CodeGen/AArch64/sve-fp.ll
index 2afecdfc826d..e4aea2847bc4 100644
--- a/llvm/test/CodeGen/AArch64/sve-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fp.ll
@@ -480,62 +480,6 @@ define void @float_copy(<vscale x 4 x float>* %P1, <vscale x 4 x float>* %P2) {
   ret void
 }
 
-; FCEIL
-
-define <vscale x 8 x half> @frintp_nxv8f16(<vscale x 8 x half> %a) {
-; CHECK-LABEL: frintp_nxv8f16:
-; CHECK: ptrue p0.h
-; CHECK-NEXT: frintp z0.h, p0/m, z0.h
-; CHECK-NEXT: ret
-  %res = call <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half> %a)
-  ret <vscale x 8 x half> %res  
-}
-
-define <vscale x 4 x half> @frintp_nxv4f16(<vscale x 4 x half> %a) {
-; CHECK-LABEL: frintp_nxv4f16:
-; CHECK: ptrue p0.s
-; CHECK-NEXT: frintp z0.h, p0/m, z0.h
-; CHECK-NEXT: ret
-  %res = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %a)
-  ret <vscale x 4 x half> %res  
-}
-
-define <vscale x 2 x half> @frintp_nxv2f16(<vscale x 2 x half> %a) {
-; CHECK-LABEL: frintp_nxv2f16:
-; CHECK: ptrue p0.d
-; CHECK-NEXT: frintp z0.h, p0/m, z0.h
-; CHECK-NEXT: ret
-  %res = call <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half> %a)
-  ret <vscale x 2 x half> %res  
-}
-
-define <vscale x 4 x float> @frintp_nxv4f32(<vscale x 4 x float> %a) {
-; CHECK-LABEL: frintp_nxv4f32:
-; CHECK: ptrue p0.s
-; CHECK-NEXT: frintp z0.s, p0/m, z0.s
-; CHECK-NEXT: ret
-  %res = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %a)
-  ret <vscale x 4 x float> %res  
-}
-
-define <vscale x 2 x float> @frintp_nxv2f32(<vscale x 2 x float> %a) {
-; CHECK-LABEL: frintp_nxv2f32:
-; CHECK: ptrue p0.d
-; CHECK-NEXT: frintp z0.s, p0/m, z0.s
-; CHECK-NEXT: ret
-  %res = call <vscale x 2 x float> @llvm.ceil.nxv2f32(<vscale x 2 x float> %a)
-  ret <vscale x 2 x float> %res  
-}
-
-define <vscale x 2 x double> @frintp_nxv2f64(<vscale x 2 x double> %a) {
-; CHECK-LABEL: frintp_nxv2f64:
-; CHECK: ptrue p0.d
-; CHECK-NEXT: frintp z0.d, p0/m, z0.d
-; CHECK-NEXT: ret
-  %res = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> %a)
-  ret <vscale x 2 x double> %res  
-}
-
 declare <vscale x 8 x half> @llvm.aarch64.sve.frecps.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
 declare <vscale x 4 x float>  @llvm.aarch64.sve.frecps.x.nxv4f32(<vscale x 4 x float> , <vscale x 4 x float>)
 declare <vscale x 2 x double> @llvm.aarch64.sve.frecps.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
@@ -551,12 +495,5 @@ declare <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half>, <vscale x 8 x
 declare <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x half>)
 declare <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x half>)
 
-declare <vscale x 8 x half> @llvm.ceil.nxv8f16( <vscale x 8 x half>)
-declare <vscale x 4 x half> @llvm.ceil.nxv4f16( <vscale x 4 x half>)
-declare <vscale x 2 x half> @llvm.ceil.nxv2f16( <vscale x 2 x half>)
-declare <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float>)
-declare <vscale x 2 x float> @llvm.ceil.nxv2f32(<vscale x 2 x float>)
-declare <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double>)
-
 ; Function Attrs: nounwind readnone
 declare double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>) #2