[llvm] 4a649ad - [AArch64][SVE] Implement floating-point conversion intrinsics

Kerry McLaughlin via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 26 02:40:51 PST 2019


Author: Kerry McLaughlin
Date: 2019-11-26T10:31:47Z
New Revision: 4a649ad21aa282d08f90ae655369235c2aaf5ad5

URL: https://github.com/llvm/llvm-project/commit/4a649ad21aa282d08f90ae655369235c2aaf5ad5
DIFF: https://github.com/llvm/llvm-project/commit/4a649ad21aa282d08f90ae655369235c2aaf5ad5.diff

LOG: [AArch64][SVE] Implement floating-point conversion intrinsics

Summary:
Adds intrinsics for the following:
  - fcvt
  - fcvtzs & fcvtzu
  - scvtf & ucvtf
  - fcvtlt, fcvtnt
  - fcvtx & fcvtxnt

Reviewers: huntergr, sdesmalen, dancgr, mgudim, efriedma

Reviewed By: sdesmalen

Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, cameron.mcinally, cfe-commits, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D70180

Added: 
    llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll

Modified: 
    llvm/include/llvm/IR/IntrinsicsAArch64.td
    llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
    llvm/lib/Target/AArch64/SVEInstrFormats.td

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 836911128ec4..c74b17c98895 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -849,6 +849,20 @@ let TargetPrefix = "aarch64" in {  // All intrinsics start with "llvm.aarch64.".
                 [LLVMVectorOfBitcastsToInt<0>],
                 [IntrNoMem]>;
 
+  class AdvSIMD_SVE_FCVT_Intrinsic
+    : Intrinsic<[llvm_anyvector_ty],
+                [LLVMMatchType<0>,
+                 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                 llvm_anyvector_ty],
+                [IntrNoMem]>;
+
+  class AdvSIMD_SVE_FCVTZS_Intrinsic
+    : Intrinsic<[llvm_anyvector_ty],
+                [LLVMVectorOfBitcastsToInt<0>,
+                 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                 llvm_anyvector_ty],
+                [IntrNoMem]>;
+
   class AdvSIMD_SVE_PUNPKHI_Intrinsic
     : Intrinsic<[LLVMHalfElementsVectorType<0>],
                 [llvm_anyvector_ty],
@@ -861,6 +875,13 @@ let TargetPrefix = "aarch64" in {  // All intrinsics start with "llvm.aarch64.".
                  LLVMVectorOfBitcastsToInt<0>],
                 [IntrNoMem]>;
 
+  class AdvSIMD_SVE_SCVTF_Intrinsic
+    : Intrinsic<[llvm_anyvector_ty],
+                [LLVMMatchType<0>,
+                 LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                 llvm_anyvector_ty],
+                [IntrNoMem]>;
+
   class AdvSIMD_SVE_TSMUL_Intrinsic
     : Intrinsic<[llvm_anyvector_ty],
                 [LLVMMatchType<0>,
@@ -1048,6 +1069,16 @@ def int_aarch64_sve_fmaxnmv : AdvSIMD_SVE_Reduce_Intrinsic;
 def int_aarch64_sve_fminv   : AdvSIMD_SVE_Reduce_Intrinsic;
 def int_aarch64_sve_fminnmv : AdvSIMD_SVE_Reduce_Intrinsic;
 
+//
+// Floating-point conversions
+//
+
+def int_aarch64_sve_fcvt   : AdvSIMD_SVE_FCVT_Intrinsic;
+def int_aarch64_sve_fcvtzs : AdvSIMD_SVE_FCVTZS_Intrinsic;
+def int_aarch64_sve_fcvtzu : AdvSIMD_SVE_FCVTZS_Intrinsic;
+def int_aarch64_sve_scvtf  : AdvSIMD_SVE_SCVTF_Intrinsic;
+def int_aarch64_sve_ucvtf  : AdvSIMD_SVE_SCVTF_Intrinsic;
+
 //
 // Floating-point comparisons
 //
@@ -1061,7 +1092,41 @@ def int_aarch64_sve_fcmpgt : AdvSIMD_SVE_Compare_Intrinsic;
 def int_aarch64_sve_fcmpne : AdvSIMD_SVE_Compare_Intrinsic;
 def int_aarch64_sve_fcmpuo : AdvSIMD_SVE_Compare_Intrinsic;
 
-def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<"svcvt_s32_f16_m", llvm_nxv4i32_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvtzs_i32f16   : Builtin_SVCVT<"svcvt_s32_f16_m", llvm_nxv4i32_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvtzs_i32f64   : Builtin_SVCVT<"svcvt_s32_f64_m", llvm_nxv4i32_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtzs_i64f16   : Builtin_SVCVT<"svcvt_s64_f16_m", llvm_nxv2i64_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvtzs_i64f32   : Builtin_SVCVT<"svcvt_s64_f32_m", llvm_nxv2i64_ty, llvm_nxv4f32_ty>;
+
+def int_aarch64_sve_fcvtzu_i32f16   : Builtin_SVCVT<"svcvt_u32_f16_m", llvm_nxv4i32_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvtzu_i32f64   : Builtin_SVCVT<"svcvt_u32_f64_m", llvm_nxv4i32_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtzu_i64f16   : Builtin_SVCVT<"svcvt_u64_f16_m", llvm_nxv2i64_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvtzu_i64f32   : Builtin_SVCVT<"svcvt_u64_f32_m", llvm_nxv2i64_ty, llvm_nxv4f32_ty>;
+
+def int_aarch64_sve_fcvt_f16f32     : Builtin_SVCVT<"svcvt_f16_f32_m", llvm_nxv8f16_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvt_f16f64     : Builtin_SVCVT<"svcvt_f16_f64_m", llvm_nxv8f16_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvt_f32f64     : Builtin_SVCVT<"svcvt_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2f64_ty>;
+
+def int_aarch64_sve_fcvt_f32f16     : Builtin_SVCVT<"svcvt_f32_f16_m", llvm_nxv4f32_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvt_f64f16     : Builtin_SVCVT<"svcvt_f64_f16_m", llvm_nxv2f64_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvt_f64f32     : Builtin_SVCVT<"svcvt_f64_f32_m", llvm_nxv2f64_ty, llvm_nxv4f32_ty>;
+
+def int_aarch64_sve_fcvtlt_f32f16   : Builtin_SVCVT<"svcvtlt_f32_f16_m", llvm_nxv4f32_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvtlt_f64f32   : Builtin_SVCVT<"svcvtlt_f64_f32_m", llvm_nxv2f64_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvtnt_f16f32   : Builtin_SVCVT<"svcvtnt_f16_f32_m", llvm_nxv8f16_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvtnt_f32f64   : Builtin_SVCVT<"svcvtnt_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2f64_ty>;
+
+def int_aarch64_sve_fcvtx_f32f64    : Builtin_SVCVT<"svcvtx_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtxnt_f32f64  : Builtin_SVCVT<"svcvtxnt_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2f64_ty>;
+
+def int_aarch64_sve_scvtf_f16i32    : Builtin_SVCVT<"svcvt_f16_s32_m", llvm_nxv8f16_ty, llvm_nxv4i32_ty>;
+def int_aarch64_sve_scvtf_f16i64    : Builtin_SVCVT<"svcvt_f16_s64_m", llvm_nxv8f16_ty, llvm_nxv2i64_ty>;
+def int_aarch64_sve_scvtf_f32i64    : Builtin_SVCVT<"svcvt_f32_s64_m", llvm_nxv4f32_ty, llvm_nxv2i64_ty>;
+def int_aarch64_sve_scvtf_f64i32    : Builtin_SVCVT<"svcvt_f64_s32_m", llvm_nxv2f64_ty, llvm_nxv4i32_ty>;
+
+def int_aarch64_sve_ucvtf_f16i32    : Builtin_SVCVT<"svcvt_f16_u32_m", llvm_nxv8f16_ty, llvm_nxv4i32_ty>;
+def int_aarch64_sve_ucvtf_f16i64    : Builtin_SVCVT<"svcvt_f16_u64_m", llvm_nxv8f16_ty, llvm_nxv2i64_ty>;
+def int_aarch64_sve_ucvtf_f32i64    : Builtin_SVCVT<"svcvt_f32_u64_m", llvm_nxv4f32_ty, llvm_nxv2i64_ty>;
+def int_aarch64_sve_ucvtf_f64i32    : Builtin_SVCVT<"svcvt_f64_u32_m", llvm_nxv2f64_ty, llvm_nxv4i32_ty>;
 
 //
 // Predicate operations

diff  --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index a4ea2cab13eb..3b4e97ed844a 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -889,40 +889,40 @@ let Predicates = [HasSVE] in {
   defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr">;
   defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl">;
 
-  def FCVT_ZPmZ_StoH   : sve_fp_2op_p_zd<0b1001000, "fcvt",   ZPR32, ZPR16, ElementSizeS>;
-  def FCVT_ZPmZ_HtoS   : sve_fp_2op_p_zd<0b1001001, "fcvt",   ZPR16, ZPR32, ElementSizeS>;
-  def SCVTF_ZPmZ_HtoH  : sve_fp_2op_p_zd<0b0110010, "scvtf",  ZPR16, ZPR16, ElementSizeH>;
-  def SCVTF_ZPmZ_StoS  : sve_fp_2op_p_zd<0b1010100, "scvtf",  ZPR32, ZPR32, ElementSizeS>;
-  def UCVTF_ZPmZ_StoS  : sve_fp_2op_p_zd<0b1010101, "ucvtf",  ZPR32, ZPR32, ElementSizeS>;
-  def UCVTF_ZPmZ_HtoH  : sve_fp_2op_p_zd<0b0110011, "ucvtf",  ZPR16, ZPR16, ElementSizeH>;
-  def FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, ElementSizeH>;
-  def FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, ElementSizeS>;
-  def FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, ElementSizeH>;
-  def FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, ElementSizeS>;
-  def FCVT_ZPmZ_DtoH   : sve_fp_2op_p_zd<0b1101000, "fcvt",   ZPR64, ZPR16, ElementSizeD>;
-  def FCVT_ZPmZ_HtoD   : sve_fp_2op_p_zd<0b1101001, "fcvt",   ZPR16, ZPR64, ElementSizeD>;
-  def FCVT_ZPmZ_DtoS   : sve_fp_2op_p_zd<0b1101010, "fcvt",   ZPR64, ZPR32, ElementSizeD>;
-  def FCVT_ZPmZ_StoD   : sve_fp_2op_p_zd<0b1101011, "fcvt",   ZPR32, ZPR64, ElementSizeD>;
-  def SCVTF_ZPmZ_StoD  : sve_fp_2op_p_zd<0b1110000, "scvtf",  ZPR32, ZPR64, ElementSizeD>;
-  def UCVTF_ZPmZ_StoD  : sve_fp_2op_p_zd<0b1110001, "ucvtf",  ZPR32, ZPR64, ElementSizeD>;
-  def UCVTF_ZPmZ_StoH  : sve_fp_2op_p_zd<0b0110101, "ucvtf",  ZPR32, ZPR16, ElementSizeS>;
-  def SCVTF_ZPmZ_DtoS  : sve_fp_2op_p_zd<0b1110100, "scvtf",  ZPR64, ZPR32, ElementSizeD>;
-  def SCVTF_ZPmZ_StoH  : sve_fp_2op_p_zd<0b0110100, "scvtf",  ZPR32, ZPR16, ElementSizeS>;
-  def SCVTF_ZPmZ_DtoH  : sve_fp_2op_p_zd<0b0110110, "scvtf",  ZPR64, ZPR16, ElementSizeD>;
-  def UCVTF_ZPmZ_DtoS  : sve_fp_2op_p_zd<0b1110101, "ucvtf",  ZPR64, ZPR32, ElementSizeD>;
-  def UCVTF_ZPmZ_DtoH  : sve_fp_2op_p_zd<0b0110111, "ucvtf",  ZPR64, ZPR16, ElementSizeD>;
-  def SCVTF_ZPmZ_DtoD  : sve_fp_2op_p_zd<0b1110110, "scvtf",  ZPR64, ZPR64, ElementSizeD>;
-  def UCVTF_ZPmZ_DtoD  : sve_fp_2op_p_zd<0b1110111, "ucvtf",  ZPR64, ZPR64, ElementSizeD>;
-  def FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, ElementSizeD>;
-  def FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, ElementSizeD>;
-  def FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, ElementSizeD>;
-  def FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, ElementSizeS>;
-  def FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, ElementSizeD>;
-  def FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, ElementSizeS>;
-  def FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, ElementSizeD>;
-  def FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, ElementSizeD>;
-  def FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, ElementSizeD>;
-  def FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, ElementSizeD>;
+  defm FCVT_ZPmZ_StoH   : sve_fp_2op_p_zd<0b1001000, "fcvt",   ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32,    nxv8f16, nxv16i1, nxv4f32, ElementSizeS>;
+  defm FCVT_ZPmZ_HtoS   : sve_fp_2op_p_zd<0b1001001, "fcvt",   ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16,    nxv4f32, nxv16i1, nxv8f16, ElementSizeS>;
+  defm SCVTF_ZPmZ_HtoH  : sve_fp_2op_p_zd<0b0110010, "scvtf",  ZPR16, ZPR16, int_aarch64_sve_scvtf,          nxv8f16, nxv8i1,  nxv8i16, ElementSizeH>;
+  defm SCVTF_ZPmZ_StoS  : sve_fp_2op_p_zd<0b1010100, "scvtf",  ZPR32, ZPR32, int_aarch64_sve_scvtf,          nxv4f32, nxv4i1,  nxv4i32, ElementSizeS>;
+  defm UCVTF_ZPmZ_StoS  : sve_fp_2op_p_zd<0b1010101, "ucvtf",  ZPR32, ZPR32, int_aarch64_sve_ucvtf,          nxv4f32, nxv4i1,  nxv4i32, ElementSizeS>;
+  defm UCVTF_ZPmZ_HtoH  : sve_fp_2op_p_zd<0b0110011, "ucvtf",  ZPR16, ZPR16, int_aarch64_sve_ucvtf,          nxv8f16, nxv8i1,  nxv8i16, ElementSizeH>;
+  defm FCVTZS_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111010, "fcvtzs", ZPR16, ZPR16, int_aarch64_sve_fcvtzs,         nxv8i16, nxv8i1,  nxv8f16, ElementSizeH>;
+  defm FCVTZS_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011100, "fcvtzs", ZPR32, ZPR32, int_aarch64_sve_fcvtzs,         nxv4i32, nxv4i1,  nxv4f32, ElementSizeS>;
+  defm FCVTZU_ZPmZ_HtoH : sve_fp_2op_p_zd<0b0111011, "fcvtzu", ZPR16, ZPR16, int_aarch64_sve_fcvtzu,         nxv8i16, nxv8i1,  nxv8f16, ElementSizeH>;
+  defm FCVTZU_ZPmZ_StoS : sve_fp_2op_p_zd<0b1011101, "fcvtzu", ZPR32, ZPR32, int_aarch64_sve_fcvtzu,         nxv4i32, nxv4i1,  nxv4f32, ElementSizeS>;
+  defm FCVT_ZPmZ_DtoH   : sve_fp_2op_p_zd<0b1101000, "fcvt",   ZPR64, ZPR16, int_aarch64_sve_fcvt_f16f64,    nxv8f16, nxv16i1, nxv2f64, ElementSizeD>;
+  defm FCVT_ZPmZ_HtoD   : sve_fp_2op_p_zd<0b1101001, "fcvt",   ZPR16, ZPR64, int_aarch64_sve_fcvt_f64f16,    nxv2f64, nxv16i1, nxv8f16, ElementSizeD>;
+  defm FCVT_ZPmZ_DtoS   : sve_fp_2op_p_zd<0b1101010, "fcvt",   ZPR64, ZPR32, int_aarch64_sve_fcvt_f32f64,    nxv4f32, nxv16i1, nxv2f64, ElementSizeD>;
+  defm FCVT_ZPmZ_StoD   : sve_fp_2op_p_zd<0b1101011, "fcvt",   ZPR32, ZPR64, int_aarch64_sve_fcvt_f64f32,    nxv2f64, nxv16i1, nxv4f32, ElementSizeD>;
+  defm SCVTF_ZPmZ_StoD  : sve_fp_2op_p_zd<0b1110000, "scvtf",  ZPR32, ZPR64, int_aarch64_sve_scvtf_f64i32,   nxv2f64, nxv16i1, nxv4i32, ElementSizeD>;
+  defm UCVTF_ZPmZ_StoD  : sve_fp_2op_p_zd<0b1110001, "ucvtf",  ZPR32, ZPR64, int_aarch64_sve_ucvtf_f64i32,   nxv2f64, nxv16i1, nxv4i32, ElementSizeD>;
+  defm UCVTF_ZPmZ_StoH  : sve_fp_2op_p_zd<0b0110101, "ucvtf",  ZPR32, ZPR16, int_aarch64_sve_ucvtf_f16i32,   nxv8f16, nxv16i1, nxv4i32, ElementSizeS>;
+  defm SCVTF_ZPmZ_DtoS  : sve_fp_2op_p_zd<0b1110100, "scvtf",  ZPR64, ZPR32, int_aarch64_sve_scvtf_f32i64,   nxv4f32, nxv16i1, nxv2i64, ElementSizeD>;
+  defm SCVTF_ZPmZ_StoH  : sve_fp_2op_p_zd<0b0110100, "scvtf",  ZPR32, ZPR16, int_aarch64_sve_scvtf_f16i32,   nxv8f16, nxv16i1, nxv4i32, ElementSizeS>;
+  defm SCVTF_ZPmZ_DtoH  : sve_fp_2op_p_zd<0b0110110, "scvtf",  ZPR64, ZPR16, int_aarch64_sve_scvtf_f16i64,   nxv8f16, nxv16i1, nxv2i64, ElementSizeD>;
+  defm UCVTF_ZPmZ_DtoS  : sve_fp_2op_p_zd<0b1110101, "ucvtf",  ZPR64, ZPR32, int_aarch64_sve_ucvtf_f32i64,   nxv4f32, nxv16i1, nxv2i64, ElementSizeD>;
+  defm UCVTF_ZPmZ_DtoH  : sve_fp_2op_p_zd<0b0110111, "ucvtf",  ZPR64, ZPR16, int_aarch64_sve_ucvtf_f16i64,   nxv8f16, nxv16i1, nxv2i64, ElementSizeD>;
+  defm SCVTF_ZPmZ_DtoD  : sve_fp_2op_p_zd<0b1110110, "scvtf",  ZPR64, ZPR64, int_aarch64_sve_scvtf,          nxv2f64, nxv2i1,  nxv2i64, ElementSizeD>;
+  defm UCVTF_ZPmZ_DtoD  : sve_fp_2op_p_zd<0b1110111, "ucvtf",  ZPR64, ZPR64, int_aarch64_sve_ucvtf,          nxv2f64, nxv2i1,  nxv2i64, ElementSizeD>;
+  defm FCVTZS_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111000, "fcvtzs", ZPR64, ZPR32, int_aarch64_sve_fcvtzs_i32f64,  nxv4i32, nxv16i1, nxv2f64, ElementSizeD>;
+  defm FCVTZU_ZPmZ_DtoS : sve_fp_2op_p_zd<0b1111001, "fcvtzu", ZPR64, ZPR32, int_aarch64_sve_fcvtzu_i32f64,  nxv4i32, nxv16i1, nxv2f64, ElementSizeD>;
+  defm FCVTZS_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111100, "fcvtzs", ZPR32, ZPR64, int_aarch64_sve_fcvtzs_i64f32,  nxv2i64, nxv16i1, nxv4f32, ElementSizeD>;
+  defm FCVTZS_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111100, "fcvtzs", ZPR16, ZPR32, int_aarch64_sve_fcvtzs_i32f16,  nxv4i32, nxv16i1, nxv8f16, ElementSizeS>;
+  defm FCVTZS_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111110, "fcvtzs", ZPR16, ZPR64, int_aarch64_sve_fcvtzs_i64f16,  nxv2i64, nxv16i1, nxv8f16, ElementSizeD>;
+  defm FCVTZU_ZPmZ_HtoS : sve_fp_2op_p_zd<0b0111101, "fcvtzu", ZPR16, ZPR32, int_aarch64_sve_fcvtzu_i32f16,  nxv4i32, nxv16i1, nxv8f16, ElementSizeS>;
+  defm FCVTZU_ZPmZ_HtoD : sve_fp_2op_p_zd<0b0111111, "fcvtzu", ZPR16, ZPR64, int_aarch64_sve_fcvtzu_i64f16,  nxv2i64, nxv16i1, nxv8f16, ElementSizeD>;
+  defm FCVTZU_ZPmZ_StoD : sve_fp_2op_p_zd<0b1111101, "fcvtzu", ZPR32, ZPR64, int_aarch64_sve_fcvtzu_i64f32,  nxv2i64, nxv16i1, nxv4f32, ElementSizeD>;
+  defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111110, "fcvtzs", ZPR64, ZPR64, int_aarch64_sve_fcvtzs,         nxv2i64, nxv2i1,  nxv2f64, ElementSizeD>;
+  defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, int_aarch64_sve_fcvtzu,         nxv2i64, nxv2i1,  nxv2f64, ElementSizeD>;
 
   defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", int_aarch64_sve_frintn>;
   defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", int_aarch64_sve_frintp>;
@@ -1422,10 +1422,10 @@ let Predicates = [HasSVE2] in {
   defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">;
 
   // SVE2 floating-point convert precision
-  defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtxnt">;
-  defm FCVTNT_ZPmZ  : sve2_fp_convert_down_narrow<"fcvtnt">;
-  defm FCVTLT_ZPmZ  : sve2_fp_convert_up_long<"fcvtlt">;
-  def FCVTX_ZPmZ_DtoS : sve_fp_2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32, ElementSizeD>;
+  defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding_top<"fcvtxnt", "int_aarch64_sve_fcvtxnt">;
+  defm FCVTX_ZPmZ   : sve2_fp_convert_down_odd_rounding<"fcvtx",       "int_aarch64_sve_fcvtx">;
+  defm FCVTNT_ZPmZ  : sve2_fp_convert_down_narrow<"fcvtnt",            "int_aarch64_sve_fcvtnt">;
+  defm FCVTLT_ZPmZ  : sve2_fp_convert_up_long<"fcvtlt",                "int_aarch64_sve_fcvtlt">;
 
   // SVE2 floating-point pairwise operations
   defm FADDP_ZPmZZ   : sve2_fp_pairwise_pred<0b000, "faddp">;

diff  --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 855510e7f556..38f318849b33 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -1633,18 +1633,26 @@ class sve2_fp_convert_precision<bits<4> opc, string asm,
   let Constraints = "$Zd = $_Zd";
 }
 
-multiclass sve2_fp_convert_down_narrow<string asm> {
+multiclass sve2_fp_convert_down_narrow<string asm, string op> {
   def _StoH : sve2_fp_convert_precision<0b1000, asm, ZPR16, ZPR32>;
   def _DtoS : sve2_fp_convert_precision<0b1110, asm, ZPR32, ZPR64>;
+
+  def : SVE_3_Op_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f32), nxv8f16, nxv16i1, nxv4f32, !cast<Instruction>(NAME # _StoH)>;
+  def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv16i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
 }
 
-multiclass sve2_fp_convert_up_long<string asm> {
+multiclass sve2_fp_convert_up_long<string asm, string op> {
   def _HtoS : sve2_fp_convert_precision<0b1001, asm, ZPR32, ZPR16>;
   def _StoD : sve2_fp_convert_precision<0b1111, asm, ZPR64, ZPR32>;
+
+  def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv16i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
+  def : SVE_3_Op_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv16i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
 }
 
-multiclass sve2_fp_convert_down_odd_rounding<string asm> {
+multiclass sve2_fp_convert_down_odd_rounding_top<string asm, string op> {
   def _DtoS : sve2_fp_convert_precision<0b0010, asm, ZPR32, ZPR64>;
+
+  def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv16i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1830,6 +1838,16 @@ class sve_fp_2op_p_zd<bits<7> opc, string asm, RegisterOperand i_zprtype,
   let ElementSize = size;
 }
 
+multiclass sve_fp_2op_p_zd<bits<7> opc, string asm,
+                           RegisterOperand i_zprtype,
+                           RegisterOperand o_zprtype,
+                           SDPatternOperator op, ValueType vt1,
+                           ValueType vt2, ValueType vt3, ElementSizeEnum Sz> {
+  def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>;
+
+  def : SVE_3_Op_Pat<vt1, op, vt1, vt2, vt3, !cast<Instruction>(NAME)>;
+}
+
 multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op> {
   def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>;
   def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>;
@@ -1846,6 +1864,11 @@ multiclass sve2_fp_flogb<string asm> {
   def _D : sve_fp_2op_p_zd<0b0011110, asm, ZPR64, ZPR64, ElementSizeD>;
 }
 
+multiclass sve2_fp_convert_down_odd_rounding<string asm, string op> {
+  def _DtoS : sve_fp_2op_p_zd<0b0001010, asm, ZPR64, ZPR32, ElementSizeD>;
+  def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv16i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
+}
+
 //===----------------------------------------------------------------------===//
 // SVE Floating Point Unary Operations - Unpredicated Group
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll
new file mode 100644
index 000000000000..e777a2f3b8b0
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll
@@ -0,0 +1,400 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; FCVT
+;
+
+define <vscale x 8 x half> @fcvt_f16_f32(<vscale x 8 x half> %a, <vscale x 16 x i1> %pg, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fcvt_f16_f32:
+; CHECK: fcvt z0.h, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> %a,
+                                                                <vscale x 16 x i1> %pg,
+                                                                <vscale x 4 x float> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @fcvt_f16_f64(<vscale x 8 x half> %a, <vscale x 16 x i1> %pg, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fcvt_f16_f64:
+; CHECK: fcvt z0.h, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> %a,
+                                                                <vscale x 16 x i1> %pg,
+                                                                <vscale x 2 x double> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @fcvt_f32_f16(<vscale x 4 x float> %a, <vscale x 16 x i1> %pg, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fcvt_f32_f16:
+; CHECK: fcvt z0.s, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> %a,
+                                                                 <vscale x 16 x i1> %pg,
+                                                                 <vscale x 8 x half> %b)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @fcvt_f32_f64(<vscale x 4 x float> %a, <vscale x 16 x i1> %pg, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fcvt_f32_f64:
+; CHECK: fcvt z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> %a,
+                                                                 <vscale x 16 x i1> %pg,
+                                                                 <vscale x 2 x double> %b)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fcvt_f64_f16(<vscale x 2 x double> %a, <vscale x 16 x i1> %pg, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fcvt_f64_f16:
+; CHECK: fcvt z0.d, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> %a,
+                                                                  <vscale x 16 x i1> %pg,
+                                                                  <vscale x 8 x half> %b)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @fcvt_f64_f32(<vscale x 2 x double> %a, <vscale x 16 x i1> %pg, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fcvt_f64_f32:
+; CHECK: fcvt z0.d, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> %a,
+                                                                  <vscale x 16 x i1> %pg,
+                                                                  <vscale x 4 x float> %b)
+  ret <vscale x 2 x double> %out
+}
+
+;
+; FCVTZS
+;
+
+define <vscale x 8 x i16> @fcvtzs_i16_f16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fcvtzs_i16_f16:
+; CHECK: fcvtzs z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16(<vscale x 8 x i16> %a,
+                                                                          <vscale x 8 x i1> %pg,
+                                                                          <vscale x 8 x half> %b)
+  ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @fcvtzs_i32_f32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fcvtzs_i32_f32:
+; CHECK: fcvtzs z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32(<vscale x 4 x i32> %a,
+                                                                          <vscale x 4 x i1> %pg,
+                                                                          <vscale x 4 x float> %b)
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @fcvtzs_i64_f64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fcvtzs_i64_f64:
+; CHECK: fcvtzs z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64(<vscale x 2 x i64> %a,
+                                                                          <vscale x 2 x i1> %pg,
+                                                                          <vscale x 2 x double> %b)
+  ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 4 x i32> @fcvtzs_i32_f16(<vscale x 4 x i32> %a, <vscale x 16 x i1> %pg, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fcvtzs_i32_f16:
+; CHECK: fcvtzs z0.s, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f16(<vscale x 4 x i32> %a,
+                                                                 <vscale x 16 x i1> %pg,
+                                                                 <vscale x 8 x half> %b)
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @fcvtzs_i32_f64(<vscale x 4 x i32> %a, <vscale x 16 x i1> %pg, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fcvtzs_i32_f64:
+; CHECK: fcvtzs z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f64(<vscale x 4 x i32> %a,
+                                                                 <vscale x 16 x i1> %pg,
+                                                                 <vscale x 2 x double> %b)
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @fcvtzs_i64_f16(<vscale x 2 x i64> %a, <vscale x 16 x i1> %pg, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fcvtzs_i64_f16:
+; CHECK: fcvtzs z0.d, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f16(<vscale x 2 x i64> %a,
+                                                                 <vscale x 16 x i1> %pg,
+                                                                 <vscale x 8 x half> %b)
+  ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @fcvtzs_i64_f32(<vscale x 2 x i64> %a, <vscale x 16 x i1> %pg, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fcvtzs_i64_f32:
+; CHECK: fcvtzs z0.d, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f32(<vscale x 2 x i64> %a,
+                                                                 <vscale x 16 x i1> %pg,
+                                                                 <vscale x 4 x float> %b)
+  ret <vscale x 2 x i64> %out
+}
+
+;
+; FCVTZU
+;
+
+define <vscale x 8 x i16> @fcvtzu_i16_f16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fcvtzu_i16_f16:
+; CHECK: fcvtzu z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16(<vscale x 8 x i16> %a,
+                                                                          <vscale x 8 x i1> %pg,
+                                                                          <vscale x 8 x half> %b)
+  ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @fcvtzu_i32_f32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fcvtzu_i32_f32:
+; CHECK: fcvtzu z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32(<vscale x 4 x i32> %a,
+                                                                          <vscale x 4 x i1> %pg,
+                                                                          <vscale x 4 x float> %b)
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @fcvtzu_i64_f64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fcvtzu_i64_f64:
+; CHECK: fcvtzu z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64(<vscale x 2 x i64> %a,
+                                                                          <vscale x 2 x i1> %pg,
+                                                                          <vscale x 2 x double> %b)
+  ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 4 x i32> @fcvtzu_i32_f16(<vscale x 4 x i32> %a, <vscale x 16 x i1> %pg, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fcvtzu_i32_f16:
+; CHECK: fcvtzu z0.s, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f16(<vscale x 4 x i32> %a,
+                                                                 <vscale x 16 x i1> %pg,
+                                                                 <vscale x 8 x half> %b)
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @fcvtzu_i32_f64(<vscale x 4 x i32> %a, <vscale x 16 x i1> %pg, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fcvtzu_i32_f64:
+; CHECK: fcvtzu z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f64(<vscale x 4 x i32> %a,
+                                                                 <vscale x 16 x i1> %pg,
+                                                                 <vscale x 2 x double> %b)
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @fcvtzu_i64_f16(<vscale x 2 x i64> %a, <vscale x 16 x i1> %pg, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fcvtzu_i64_f16:
+; CHECK: fcvtzu z0.d, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f16(<vscale x 2 x i64> %a,
+                                                                 <vscale x 16 x i1> %pg,
+                                                                 <vscale x 8 x half> %b)
+  ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @fcvtzu_i64_f32(<vscale x 2 x i64> %a, <vscale x 16 x i1> %pg, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fcvtzu_i64_f32:
+; CHECK: fcvtzu z0.d, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f32(<vscale x 2 x i64> %a,
+                                                                 <vscale x 16 x i1> %pg,
+                                                                 <vscale x 4 x float> %b)
+  ret <vscale x 2 x i64> %out
+}
+
+;
+; SCVTF
+;
+
+define <vscale x 8 x half> @scvtf_f16_i16(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: scvtf_f16_i16:
+; CHECK: scvtf z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half> %a,
+                                                                          <vscale x 8 x i1> %pg,
+                                                                          <vscale x 8 x i16> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @scvtf_f32_i32(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: scvtf_f32_i32:
+; CHECK: scvtf z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32(<vscale x 4 x float> %a,
+                                                                           <vscale x 4 x i1> %pg,
+                                                                           <vscale x 4 x i32> %b)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @scvtf_f64_i64(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: scvtf_f64_i64:
+; CHECK: scvtf z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.nxv2f64.nxv2i64(<vscale x 2 x double> %a,
+                                                                            <vscale x 2 x i1> %pg,
+                                                                            <vscale x 2 x i64> %b)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 8 x half> @scvtf_f16_i32(<vscale x 8 x half> %a, <vscale x 16 x i1> %pg, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: scvtf_f16_i32:
+; CHECK: scvtf z0.h, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> %a,
+                                                                 <vscale x 16 x i1> %pg,
+                                                                 <vscale x 4 x i32> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @scvtf_f16_i64(<vscale x 8 x half> %a, <vscale x 16 x i1> %pg, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: scvtf_f16_i64:
+; CHECK: scvtf z0.h, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half> %a,
+                                                                 <vscale x 16 x i1> %pg,
+                                                                 <vscale x 2 x i64> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @scvtf_f32_i64(<vscale x 4 x float> %a, <vscale x 16 x i1> %pg, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: scvtf_f32_i64:
+; CHECK: scvtf z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float> %a,
+                                                                  <vscale x 16 x i1> %pg,
+                                                                  <vscale x 2 x i64> %b)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @scvtf_f64_i32(<vscale x 2 x double> %a, <vscale x 16 x i1> %pg, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: scvtf_f64_i32:
+; CHECK: scvtf z0.d, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double> %a,
+                                                                   <vscale x 16 x i1> %pg,
+                                                                   <vscale x 4 x i32> %b)
+  ret <vscale x 2 x double> %out
+}
+
+;
+; UCVTF
+;
+
+define <vscale x 8 x half> @ucvtf_f16_i16(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: ucvtf_f16_i16:
+; CHECK: ucvtf z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(<vscale x 8 x half> %a,
+                                                                          <vscale x 8 x i1> %pg,
+                                                                          <vscale x 8 x i16> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @ucvtf_f32_i32(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: ucvtf_f32_i32:
+; CHECK: ucvtf z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.nxv4f32.nxv4i32(<vscale x 4 x float> %a,
+                                                                           <vscale x 4 x i1> %pg,
+                                                                           <vscale x 4 x i32> %b)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @ucvtf_f64_i64(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: ucvtf_f64_i64:
+; CHECK: ucvtf z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.nxv2f64.nxv2i64(<vscale x 2 x double> %a,
+                                                                            <vscale x 2 x i1> %pg,
+                                                                            <vscale x 2 x i64> %b)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 8 x half> @ucvtf_f16_i32(<vscale x 8 x half> %a, <vscale x 16 x i1> %pg, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: ucvtf_f16_i32:
+; CHECK: ucvtf z0.h, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> %a,
+                                                                 <vscale x 16 x i1> %pg,
+                                                                 <vscale x 4 x i32> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @ucvtf_f16_i64(<vscale x 8 x half> %a, <vscale x 16 x i1> %pg, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: ucvtf_f16_i64:
+; CHECK: ucvtf z0.h, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half> %a,
+                                                                 <vscale x 16 x i1> %pg,
+                                                                 <vscale x 2 x i64> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @ucvtf_f32_i64(<vscale x 4 x float> %a, <vscale x 16 x i1> %pg, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: ucvtf_f32_i64:
+; CHECK: ucvtf z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float> %a,
+                                                                  <vscale x 16 x i1> %pg,
+                                                                  <vscale x 2 x i64> %b)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @ucvtf_f64_i32(<vscale x 2 x double> %a, <vscale x 16 x i1> %pg, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: ucvtf_f64_i32:
+; CHECK: ucvtf z0.d, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double> %a,
+                                                                   <vscale x 16 x i1> %pg,
+                                                                   <vscale x 4 x i32> %b)
+  ret <vscale x 2 x double> %out
+}
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half>, <vscale x 16 x i1>, <vscale x 4 x float>)
+declare <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half>, <vscale x 16 x i1>, <vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float>, <vscale x 16 x i1>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float>, <vscale x 16 x i1>, <vscale x 2 x double>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double>, <vscale x 16 x i1>, <vscale x 8 x half>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double>, <vscale x 16 x i1>, <vscale x 4 x float>)
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x half>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x float>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x double>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f16(<vscale x 4 x i32>, <vscale x 16 x i1>, <vscale x 8 x half>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f64(<vscale x 4 x i32>, <vscale x 16 x i1>, <vscale x 2 x double>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f16(<vscale x 2 x i64>, <vscale x 16 x i1>, <vscale x 8 x half>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f32(<vscale x 2 x i64>, <vscale x 16 x i1>, <vscale x 4 x float>)
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x half>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x float>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x double>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f16(<vscale x 4 x i32>, <vscale x 16 x i1>, <vscale x 8 x half>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f64(<vscale x 4 x i32>, <vscale x 16 x i1>, <vscale x 2 x double>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f16(<vscale x 2 x i64>, <vscale x 16 x i1>, <vscale x 8 x half>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f32(<vscale x 2 x i64>, <vscale x 16 x i1>, <vscale x 4 x float>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x i16>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.scvtf.nxv2f64.nxv2i64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x i64>)
+declare <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half>, <vscale x 16 x i1>, <vscale x 4 x i32>)
+declare <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half>, <vscale x 16 x i1>, <vscale x 2 x i64>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float>, <vscale x 16 x i1>, <vscale x 2 x i64>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double>, <vscale x 16 x i1>, <vscale x 4 x i32>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(<vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x i16>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.nxv4f32.nxv4i32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.nxv2f64.nxv2i64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x i64>)
+declare <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half>, <vscale x 16 x i1>, <vscale x 4 x i32>)
+declare <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half>, <vscale x 16 x i1>, <vscale x 2 x i64>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float>, <vscale x 16 x i1>, <vscale x 2 x i64>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double>, <vscale x 16 x i1>, <vscale x 4 x i32>)

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll
new file mode 100644
index 000000000000..4d110fee41c9
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll
@@ -0,0 +1,84 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+
+;
+; FCVTLT
+;
+
+define <vscale x 4 x float> @fcvtlt_f32_f16(<vscale x 4 x float> %a, <vscale x 16 x i1> %pg, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fcvtlt_f32_f16:
+; CHECK: fcvtlt z0.s, p0/m, z1.h
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> %a,
+                                                                   <vscale x 16 x i1> %pg,
+                                                                   <vscale x 8 x half> %b)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fcvtlt_f64_f32(<vscale x 2 x double> %a, <vscale x 16 x i1> %pg, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fcvtlt_f64_f32:
+; CHECK: fcvtlt	z0.d, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> %a,
+                                                                    <vscale x 16 x i1> %pg,
+                                                                    <vscale x 4 x float> %b)
+  ret <vscale x 2 x double> %out
+}
+
+;
+; FCVTNT
+;
+
+define <vscale x 8 x half> @fcvtnt_f16_f32(<vscale x 8 x half> %a, <vscale x 16 x i1> %pg, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fcvtnt_f16_f32:
+; CHECK: fcvtnt z0.h, p0/m, z1.s
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.f16f32(<vscale x 8 x half> %a,
+                                                             <vscale x 16 x i1> %pg,
+                                                             <vscale x 4 x float> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @fcvtnt_f32_f64(<vscale x 4 x float> %a, <vscale x 16 x i1> %pg, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fcvtnt_f32_f64:
+; CHECK: fcvtnt	z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.f32f64(<vscale x 4 x float> %a,
+                                                                   <vscale x 16 x i1> %pg,
+                                                                   <vscale x 2 x double> %b)
+  ret <vscale x 4 x float> %out
+}
+
+;
+; FCVTX
+;
+
+define <vscale x 4 x float> @fcvtx_f32_f64(<vscale x 4 x float> %a, <vscale x 16 x i1> %pg, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fcvtx_f32_f64:
+; CHECK: fcvtx z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> %a,
+                                                                  <vscale x 16 x i1> %pg,
+                                                                  <vscale x 2 x double> %b)
+  ret <vscale x 4 x float> %out
+}
+
+;
+; FCVTXNT
+;
+
+define <vscale x 4 x float> @fcvtxnt_f32_f64(<vscale x 4 x float> %a, <vscale x 16 x i1> %pg, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fcvtxnt_f32_f64:
+; CHECK: fcvtxnt z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.f32f64(<vscale x 4 x float> %a,
+                                                                    <vscale x 16 x i1> %pg,
+                                                                    <vscale x 2 x double> %b)
+  ret <vscale x 4 x float> %out
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float>, <vscale x 16 x i1>, <vscale x 8 x half>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double>, <vscale x 16 x i1>, <vscale x 4 x float>)
+declare <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.f16f32(<vscale x 8 x half>, <vscale x 16 x i1>, <vscale x 4 x float>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.f32f64(<vscale x 4 x float>, <vscale x 16 x i1>, <vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float>, <vscale x 16 x i1>, <vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.f32f64(<vscale x 4 x float>, <vscale x 16 x i1>, <vscale x 2 x double>)


        


More information about the llvm-commits mailing list