[llvm] 5f6a971 - [AArch64][SME2] Add multi-vector convert to/from floating-point intrinsic

Mon Jan 23 09:09:25 PST 2023

Author: Caroline Concatto
Date: 2023-01-23T17:09:04Z
New Revision: 5f6a97115759625fb3cbf61457cb8b4c6c3f8399

URL: https://github.com/llvm/llvm-project/commit/5f6a97115759625fb3cbf61457cb8b4c6c3f8399
DIFF: https://github.com/llvm/llvm-project/commit/5f6a97115759625fb3cbf61457cb8b4c6c3f8399.diff

LOG: [AArch64][SME2] Add multi-vector convert to/from floating-point intrinsic

Add the following intrinsic:

  FCVT
  BFCVT
  FCVTZS
  FCVTZU
  SCVTF
  UCVTF

This patch also adds SelectCVTIntrinsic to handle the cases when the
intrinsic returns multiple (two or four) outputs

NOTE: These intrinsics are still in development and are subject to future changes.

Reviewed By: kmclaughlin

Differential Revision: https://reviews.llvm.org/D142032

Added: 
    llvm/test/CodeGen/AArch64/sme2-intrinsics-cvt.ll

Modified: 
    llvm/include/llvm/IR/IntrinsicsAArch64.td
    llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
    llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
    llvm/lib/Target/AArch64/SMEInstrFormats.td

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 05859cc7f0f9a..6e0459b8b8cff 100644

--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -2780,6 +2780,27 @@ let TargetPrefix = "aarch64" in {
                             [llvm_nxv4f32_ty, llvm_nxv4f32_ty],
                             [IntrNoMem]>;
 
+  class SME2_CVT_FtoI_VG2_Intrinsic
+    : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+                            [LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>],
+                            [IntrNoMem]>;
+
+  class SME2_CVT_ItoF_VG2_Intrinsic
+    : DefaultAttrsIntrinsic<[LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>],
+                            [llvm_anyvector_ty, LLVMMatchType<0>],
+                            [IntrNoMem]>;
+
+  class SME2_CVT_FtoI_VG4_Intrinsic
+    : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+                            [LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>,
+                             LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>],
+                            [IntrNoMem]>;
+
+  class SME2_CVT_ItoF_VG4_Intrinsic
+    : DefaultAttrsIntrinsic<[LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>,
+                             LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>],
+                            [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+                            [IntrNoMem]>;
 
   //
   // Multi-vector fused multiply-add/subtract
@@ -2839,4 +2860,18 @@ let TargetPrefix = "aarch64" in {
   //
   def int_aarch64_sve_fcvtn_x2  : SME2_CVT_VG2_SINGLE_Intrinsic;
   def int_aarch64_sve_bfcvtn_x2 : SME2_CVT_VG2_SINGLE_BF16_Intrinsic;
+
+  //
+  // Multi-vector convert to/from floating-point.
+  //
+  def int_aarch64_sve_fcvt_x2  : SME2_CVT_VG2_SINGLE_Intrinsic;
+  def int_aarch64_sve_bfcvt_x2 : SME2_CVT_VG2_SINGLE_BF16_Intrinsic;
+  def int_aarch64_sve_fcvts_x2 : SME2_CVT_FtoI_VG2_Intrinsic;
+  def int_aarch64_sve_fcvtu_x2 : SME2_CVT_FtoI_VG2_Intrinsic;
+  def int_aarch64_sve_scvtf_x2 : SME2_CVT_ItoF_VG2_Intrinsic;
+  def int_aarch64_sve_ucvtf_x2 : SME2_CVT_ItoF_VG2_Intrinsic;
+  def int_aarch64_sve_fcvts_x4 : SME2_CVT_FtoI_VG4_Intrinsic;
+  def int_aarch64_sve_fcvtu_x4 : SME2_CVT_FtoI_VG4_Intrinsic;
+  def int_aarch64_sve_scvtf_x4 : SME2_CVT_ItoF_VG4_Intrinsic;
+  def int_aarch64_sve_ucvtf_x4 : SME2_CVT_ItoF_VG4_Intrinsic;
 }

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 39a398ba943fe..0397e894ef4cb 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -357,6 +357,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
                             unsigned Opc_rr, unsigned Opc_ri,
                             bool IsIntr = false);
   void SelectWhilePair(SDNode *N, unsigned Opc);
+  void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
 
   bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
   /// SVE Reg+Imm addressing mode.
@@ -1747,6 +1748,22 @@ void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
   CurDAG->RemoveDeadNode(N);
 }
 
+void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
+                                             unsigned Opcode) {
+  EVT VT = N->getValueType(0);
+  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
+  SDValue Ops = createZTuple(Regs);
+  SDLoc DL(N);
+  SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
+  SDValue SuperReg = SDValue(Intrinsic, 0);
+  for (unsigned i = 0; i < NumVecs; ++i)
+    ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
+                                   AArch64::zsub0 + i, DL, VT, SuperReg));
+
+  CurDAG->RemoveDeadNode(N);
+  return;
+}
+
 void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
                                                unsigned Scale, unsigned Opc_ri,
                                                unsigned Opc_rr, bool IsIntr) {
@@ -4732,6 +4749,30 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
                AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
         SelectWhilePair(Node, Op);
       return;
+    case Intrinsic::aarch64_sve_fcvts_x2:
+      SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
+      return;
+    case Intrinsic::aarch64_sve_scvtf_x2:
+      SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
+      return;
+    case Intrinsic::aarch64_sve_fcvtu_x2:
+      SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
+      return;
+    case Intrinsic::aarch64_sve_ucvtf_x2:
+      SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
+      return;
+    case Intrinsic::aarch64_sve_fcvts_x4:
+      SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
+      return;
+    case Intrinsic::aarch64_sve_scvtf_x4:
+      SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
+      return;
+    case Intrinsic::aarch64_sve_fcvtu_x4:
+      SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
+      return;
+    case Intrinsic::aarch64_sve_ucvtf_x4:
+      SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
+      return;
     }
     break;
   }

diff  --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index 42a44c62363bb..657228490efed 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -363,9 +363,9 @@ defm UMLSL_VG4_M4ZZ  : sme2_int_mla_long_array_vg4_single<"umlsl", 0b11, int_aar
 defm UMLSL_VG2_M2Z2Z : sme2_int_mla_long_array_vg2_multi<"umlsl",  0b11, int_aarch64_sme_umlsl_vg2x2>;
 defm UMLSL_VG4_M4Z4Z : sme2_int_mla_long_array_vg4_multi<"umlsl",  0b11, int_aarch64_sme_umlsl_vg2x4>;
 
-defm FCVT_Z2Z_StoH   : sme2_cvt_vg2_single<"fcvt",   0b0000, nxv8f16, nxv4f32, null_frag>;
+defm FCVT_Z2Z_StoH   : sme2_cvt_vg2_single<"fcvt",   0b0000, nxv8f16, nxv4f32, int_aarch64_sve_fcvt_x2>;
 defm FCVTN_Z2Z_StoH  : sme2_cvt_vg2_single<"fcvtn",  0b0001, nxv8f16, nxv4f32, int_aarch64_sve_fcvtn_x2>;
-defm BFCVT_Z2Z_StoH  : sme2_cvt_vg2_single<"bfcvt",  0b1000, nxv8bf16, nxv4f32, null_frag>;
+defm BFCVT_Z2Z_StoH  : sme2_cvt_vg2_single<"bfcvt",  0b1000, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvt_x2>;
 defm BFCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvtn", 0b1001, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvtn_x2>;
 
 defm SQCVT_Z2Z_StoH  : sme2_cvt_vg2_single<"sqcvt",  0b0110, nxv8i16, nxv4i32, null_frag>;

diff  --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 8378fb64eb8b3..c1decff1f4a2b 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -2036,7 +2036,7 @@ class sme2_frint_zip_cvt_vg4_multi<bits<2>sz, bits<7>op, RegisterOperand first_t
 // SME2 multi-vec FP to int convert four registers
 // SME2 multi-vec int to FP four registers
 multiclass sme2_fp_cvt_vg4_multi<string mnemonic, bits<7> op> {
-  def _S : sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r, mnemonic>;
+  def NAME : sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r, mnemonic>;
 }
 
 // SME2 multi-vec quadwords ZIP four registers

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvt.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvt.ll
new file mode 100644
index 0000000000000..8e2824122383f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvt.ll
@@ -0,0 +1,151 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s
+
+;
+; FCVT
+;
+
+define <vscale x 8 x half> @multi_vector_cvt_x2_f16(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
+; CHECK-LABEL: multi_vector_cvt_x2_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
+; CHECK-NEXT:    fcvt z0.h, { z2.s, z3.s }
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.x2.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
+  ret <vscale x 8 x half> %res
+}
+
+;
+; BFCVT
+;
+
+define <vscale x 8 x bfloat> @multi_vector_cvt_x2_bf16(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
+; CHECK-LABEL: multi_vector_cvt_x2_bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
+; CHECK-NEXT:    bfcvt z0.h, { z2.s, z3.s }
+; CHECK-NEXT:    ret
+  %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.bfcvt.x2(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
+  ret <vscale x 8 x bfloat> %res
+}
+
+;
+; FCVTZS
+;
+define {<vscale x 4 x float>, <vscale x 4 x float>}  @multi_vector_cvt_x2_f32_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) {
+; CHECK-LABEL: multi_vector_cvt_x2_f32_s32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
+; CHECK-NEXT:    fcvtzs { z0.s, z1.s }, { z2.s, z3.s }
+; CHECK-NEXT:    ret
+  %res = call {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.fcvts.x2.nxv4f32(<vscale x 4 x i32>%zn0, <vscale x 4 x i32>%zn1)
+  ret {<vscale x 4 x float>, <vscale x 4 x float>} %res
+}
+
+define {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>}  @multi_vector_cvt_x4_f32_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3) {
+; CHECK-LABEL: multi_vector_cvt_x4_f32_s32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    fcvtzs { z0.s - z3.s }, { z4.s - z7.s }
+; CHECK-NEXT:    ret
+  %res = call {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.fcvts.x4.nxv4f32(<vscale x 4 x i32>%zn0, <vscale x 4 x i32>%zn1, <vscale x 4 x i32>%zn2, <vscale x 4 x i32>%zn3)
+  ret {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>} %res
+}
+
+;
+; FCVTZU
+;
+define {<vscale x 4 x float>, <vscale x 4 x float>}  @multi_vector_cvt_x2_f32_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) {
+; CHECK-LABEL: multi_vector_cvt_x2_f32_u32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
+; CHECK-NEXT:    fcvtzu { z0.s, z1.s }, { z2.s, z3.s }
+; CHECK-NEXT:    ret
+  %res = call {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.fcvtu.x2.nxv4f32(<vscale x 4 x i32>%zn0, <vscale x 4 x i32>%zn1)
+  ret {<vscale x 4 x float>, <vscale x 4 x float>} %res
+}
+
+define {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>}  @multi_vector_cvt_x4_f32_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3) {
+; CHECK-LABEL: multi_vector_cvt_x4_f32_u32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    fcvtzu { z0.s - z3.s }, { z4.s - z7.s }
+; CHECK-NEXT:    ret
+  %res = call {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.fcvtu.x4.nxv4f32(<vscale x 4 x i32>%zn0, <vscale x 4 x i32>%zn1, <vscale x 4 x i32>%zn2, <vscale x 4 x i32>%zn3)
+  ret {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>} %res
+}
+
+;
+; SCVTF
+;
+define {<vscale x 4 x i32>, <vscale x 4 x i32>}  @multi_vector_cvt_x2_s32_f32(<vscale x 4 x float>%unused, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) {
+; CHECK-LABEL: multi_vector_cvt_x2_s32_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
+; CHECK-NEXT:    scvtf { z0.s, z1.s }, { z2.s, z3.s }
+; CHECK-NEXT:    ret
+  %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.scvtf.x2.nxv4f32(<vscale x 4 x float>%zn0, <vscale x 4 x float>%zn1)
+  ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %res
+}
+
+define {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>}  @multi_vector_cvt_x4_s32_f32(<vscale x 4 x float>%unused, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) {
+; CHECK-LABEL: multi_vector_cvt_x4_s32_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    scvtf { z0.s - z3.s }, { z4.s - z7.s }
+; CHECK-NEXT:    ret
+  %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.scvtf.x4.nxv4f32(<vscale x 4 x float>%zn0, <vscale x 4 x float>%zn1, <vscale x 4 x float>%zn2, <vscale x 4 x float>%zn3)
+  ret {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} %res
+}
+
+;
+; UCVTF
+;
+define {<vscale x 4 x i32>, <vscale x 4 x i32>}  @multi_vector_cvt_x2_u32_f32(<vscale x 4 x float>%unused, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) {
+; CHECK-LABEL: multi_vector_cvt_x2_u32_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
+; CHECK-NEXT:    ucvtf { z0.s, z1.s }, { z2.s, z3.s }
+; CHECK-NEXT:    ret
+  %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.ucvtf.x2.nxv4f32(<vscale x 4 x float>%zn0, <vscale x 4 x float>%zn1)
+  ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %res
+}
+
+define {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>}  @multi_vector_cvt_x4_u32_f32(<vscale x 4 x float>%unused, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1,<vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) {
+; CHECK-LABEL: multi_vector_cvt_x4_u32_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    ucvtf { z0.s - z3.s }, { z4.s - z7.s }
+; CHECK-NEXT:    ret
+  %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.ucvtf.x4.nxv4f32(<vscale x 4 x float>%zn0, <vscale x 4 x float>%zn1, <vscale x 4 x float>%zn2, <vscale x 4 x float>%zn3)
+  ret {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} %res
+}
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fcvt.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 8 x bfloat> @llvm.aarch64.sve.bfcvt.x2(<vscale x 4 x float>, <vscale x 4 x float>)
+declare {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.fcvts.x2.nxv4f32(<vscale x 4 x i32>,<vscale x 4 x i32>)
+declare {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.fcvtu.x2.nxv4f32(<vscale x 4 x i32>,<vscale x 4 x i32>)
+declare {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.scvtf.x2.nxv4f32(<vscale x 4 x float>,<vscale x 4 x float>)
+declare {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.ucvtf.x2.nxv4f32(<vscale x 4 x float>,<vscale x 4 x float>)
+declare {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.fcvts.x4.nxv4f32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>)
+declare {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.fcvtu.x4.nxv4f32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>)
+declare {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.scvtf.x4.nxv4f32(<vscale x 4 x float>,<vscale x 4 x float>,<vscale x 4 x float>,<vscale x 4 x float>)
+declare {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.ucvtf.x4.nxv4f32(<vscale x 4 x float>,<vscale x 4 x float>,<vscale x 4 x float>,<vscale x 4 x float>)