[llvm] [AArch64] optimise SVE cvt intrinsics with no active lanes (PR #104809)

Mon Aug 19 09:01:56 PDT 2024

https://github.com/Lukacma created https://github.com/llvm/llvm-project/pull/104809

This patch extends https://github.com/llvm/llvm-project/pull/73964 and optimises SVE cmp intrinsics to zero vector when predicate is zero.

>From f77215aebf7592a3bc958c56a64b448bfb482136 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Mon, 19 Aug 2024 14:43:04 +0000
Subject: [PATCH] [AArch64] optimise SVE cvt intrinsics with no active lanes

---
 .../AArch64/AArch64TargetTransformInfo.cpp    |  62 +++-
 .../sve-intrinsic-comb-no-active-lanes-cvt.ll | 309 ++++++++++++++++++
 2 files changed, 370 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index a782c9c4351237..c546dfb1453492 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1073,6 +1073,32 @@ static bool isAllActivePredicate(Value *Pred) {
                          m_ConstantInt<AArch64SVEPredPattern::all>()));
 }
 
+// Simplify unary operation where predicate has all inactive lanes by replacing
+// instruction with its operand
+static std::optional<Instruction *>
+instCombineSVENoActiveUnaryReplace(InstCombiner &IC, IntrinsicInst &II,
+                                   bool hasInactiveVector) {
+  int PredOperand = hasInactiveVector ? 1 : 0;
+  int ReplaceOperand = hasInactiveVector ? 0 : 1;
+  if (match(II.getOperand(PredOperand), m_ZeroInt())) {
+    IC.replaceInstUsesWith(II, II.getOperand(ReplaceOperand));
+    return IC.eraseInstFromFunction(II);
+  }
+  return std::nullopt;
+}
+
+// Simplify unary operation where predicate has all inactive lanes or try to
+// replace with  _x form when all lanes are active
+static std::optional<Instruction *>
+instCombineSVEAllOrNoActiveUnary(InstCombiner &IC, IntrinsicInst &II) {
+  if (isAllActivePredicate(II.getOperand(1)) &&
+      !isa<llvm::UndefValue>(II.getOperand(0))) {
+    Value *Undef = llvm::UndefValue::get(II.getType());
+    return IC.replaceOperand(II, 0, Undef);
+  }
+  return instCombineSVENoActiveUnaryReplace(IC, II, true);
+}
+
 // Erase unary operation where predicate has all inactive lanes
 static std::optional<Instruction *>
 instCombineSVENoActiveUnaryErase(InstCombiner &IC, IntrinsicInst &II,
@@ -2104,7 +2130,41 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
   switch (IID) {
   default:
     break;
-
+  case Intrinsic::aarch64_sve_fcvt_bf16f32:
+  case Intrinsic::aarch64_sve_fcvt_f16f32:
+  case Intrinsic::aarch64_sve_fcvt_f16f64:
+  case Intrinsic::aarch64_sve_fcvt_f32f16:
+  case Intrinsic::aarch64_sve_fcvt_f32f64:
+  case Intrinsic::aarch64_sve_fcvt_f64f16:
+  case Intrinsic::aarch64_sve_fcvt_f64f32:
+  case Intrinsic::aarch64_sve_fcvtlt_f32f16:
+  case Intrinsic::aarch64_sve_fcvtlt_f64f32:
+  case Intrinsic::aarch64_sve_fcvtnt_bf16f32:
+  case Intrinsic::aarch64_sve_fcvtnt_f16f32:
+  case Intrinsic::aarch64_sve_fcvtnt_f32f64:
+  case Intrinsic::aarch64_sve_fcvtx_f32f64:
+  case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
+  case Intrinsic::aarch64_sve_fcvtzs:
+  case Intrinsic::aarch64_sve_fcvtzs_i32f16:
+  case Intrinsic::aarch64_sve_fcvtzs_i32f64:
+  case Intrinsic::aarch64_sve_fcvtzs_i64f16:
+  case Intrinsic::aarch64_sve_fcvtzs_i64f32:
+  case Intrinsic::aarch64_sve_fcvtzu:
+  case Intrinsic::aarch64_sve_fcvtzu_i32f16:
+  case Intrinsic::aarch64_sve_fcvtzu_i32f64:
+  case Intrinsic::aarch64_sve_fcvtzu_i64f16:
+  case Intrinsic::aarch64_sve_fcvtzu_i64f32:
+  case Intrinsic::aarch64_sve_scvtf:
+  case Intrinsic::aarch64_sve_scvtf_f16i32:
+  case Intrinsic::aarch64_sve_scvtf_f16i64:
+  case Intrinsic::aarch64_sve_scvtf_f32i64:
+  case Intrinsic::aarch64_sve_scvtf_f64i32:
+  case Intrinsic::aarch64_sve_ucvtf:
+  case Intrinsic::aarch64_sve_ucvtf_f16i32:
+  case Intrinsic::aarch64_sve_ucvtf_f16i64:
+  case Intrinsic::aarch64_sve_ucvtf_f32i64:
+  case Intrinsic::aarch64_sve_ucvtf_f64i32:
+    return instCombineSVEAllOrNoActiveUnary(IC, II);
   case Intrinsic::aarch64_sve_st1_scatter:
   case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
   case Intrinsic::aarch64_sve_st1_scatter_sxtw:
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll
new file mode 100644
index 00000000000000..eb01ff5dacab56
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll
@@ -0,0 +1,309 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+target triple = "aarch64-unknown-linux-gnu"
+
+define <vscale x 8 x bfloat> @test_fcvt_bf16_f32(<vscale x 8 x bfloat> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvt_bf16_f32(
+; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 8 x bfloat> [[A]]
+;
+  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> %a, <vscale x 8 x i1> zeroinitializer, <vscale x 4 x float> %b)
+  ret <vscale x 8 x bfloat> %out
+}
+
+define <vscale x 8 x half> @test_fcvt_f16_f32(<vscale x 8 x half> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_fcvt_f16_f32(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
+;
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @test_fcvt_f16_f64(<vscale x 8 x half> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_fcvt_f16_f64(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
+;
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @test_fcvt_f32_f16(<vscale x 4 x float> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_fcvt_f32_f16(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 8 x half> %b)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @test_fcvt_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_fcvt_f32_f64(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %out = call<vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %b)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @test_fcvt_f64_f16(<vscale x 2 x double> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 2 x double> @test_fcvt_f64_f16(
+; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
+;
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 8 x half> %b)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @test_fcvt_f64_f32(<vscale x 2 x double> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 2 x double> @test_fcvt_f64_f32(
+; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
+;
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 4 x float> %b)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 4 x float> @test_fcvtlt_f32_f16(<vscale x 4 x float> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtlt_f32_f16(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 8 x half> %b)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @test_fcvtlt_f64_f32(<vscale x 2 x double> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 2 x double> @test_fcvtlt_f64_f32(
+; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
+;
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 4 x float> %b)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32(<vscale x 8 x bfloat> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32(
+; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 8 x bfloat> [[A]]
+;
+  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> %a, <vscale x 8 x i1> zeroinitializer, <vscale x 4 x float> %b)
+  ret <vscale x 8 x bfloat> %out
+}
+
+define <vscale x 8 x half> @test_fcvtnt_f16_f32(<vscale x 8 x half> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_fcvtnt_f16_f32(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
+;
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.f16f32(<vscale x 8 x half> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @test_fcvtnt_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtnt_f32_f64(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.f32f64(<vscale x 4 x float> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %b)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @test_fcvtx_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtx_f32_f64(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %b)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @test_fcvtxnt_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtxnt_f32_f64(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.f32f64(<vscale x 4 x float> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %b)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 8 x i16> @test_fcvtzs(<vscale x 8 x i16> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 8 x i16> @test_fcvtzs(
+; CHECK-SAME: <vscale x 8 x i16> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
+;
+  %out = call <vscale x 8 x i16>  @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32(<vscale x 8 x i16> %a, <vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %b)
+  ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @test_fcvtzs_i32_f16(<vscale x 4 x i32> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @test_fcvtzs_i32_f16(
+; CHECK-SAME: <vscale x 4 x i32> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f16(<vscale x 4 x i32> %a, <vscale x 4 x i1> zeroinitializer,<vscale x 8 x half> %b)
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @test_fcvtzs_i32_f64(<vscale x 4 x i32> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @test_fcvtzs_i32_f64(
+; CHECK-SAME: <vscale x 4 x i32> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f64(<vscale x 4 x i32> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %b)
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @test_fcvtzs_i64_f16(<vscale x 2 x i64> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 2 x i64> @test_fcvtzs_i64_f16(
+; CHECK-SAME: <vscale x 2 x i64> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
+;
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f16(<vscale x 2 x i64> %a, <vscale x 2 x i1> zeroinitializer,<vscale x 8 x half> %b)
+  ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @test_fcvtzs_i64_f32(<vscale x 2 x i64> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 2 x i64> @test_fcvtzs_i64_f32(
+; CHECK-SAME: <vscale x 2 x i64> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
+;
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f32(<vscale x 2 x i64> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 4 x float> %b)
+  ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 8 x i16> @test_fcvtzu(<vscale x 8 x i16> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 8 x i16> @test_fcvtzu(
+; CHECK-SAME: <vscale x 8 x i16> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
+;
+  %out = call <vscale x 8 x i16>  @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32(<vscale x 8 x i16> %a, <vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %b)
+  ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @test_fcvtzu_i32_f16(<vscale x 4 x i32> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @test_fcvtzu_i32_f16(
+; CHECK-SAME: <vscale x 4 x i32> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f16(<vscale x 4 x i32> %a, <vscale x 4 x i1> zeroinitializer,<vscale x 8 x half> %b)
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @test_fcvtzu_i32_f64(<vscale x 4 x i32> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @test_fcvtzu_i32_f64(
+; CHECK-SAME: <vscale x 4 x i32> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f64(<vscale x 4 x i32> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %b)
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @test_fcvtzu_i64_f16(<vscale x 2 x i64> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 2 x i64> @test_fcvtzu_i64_f16(
+; CHECK-SAME: <vscale x 2 x i64> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
+;
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f16(<vscale x 2 x i64> %a, <vscale x 2 x i1> zeroinitializer,<vscale x 8 x half> %b)
+  ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @test_fcvtzu_i64_f32(<vscale x 2 x i64> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 2 x i64> @test_fcvtzu_i64_f32(
+; CHECK-SAME: <vscale x 2 x i64> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
+;
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f32(<vscale x 2 x i64> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 4 x float> %b)
+  ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 8 x half> @test_scvtf(<vscale x 8 x half> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_scvtf(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
+;
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half> %a, <vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @test_scvtf_f16_i32(<vscale x 8 x half> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_scvtf_f16_i32(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
+;
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @test_scvtf_f16_i64(<vscale x 8 x half> %a,<vscale x 2 x i64> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_scvtf_f16_i64(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
+;
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @test_scvtf_f32_i64(<vscale x 4 x float> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_scvtf_f32_i64(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %b)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double>  @test_scvtf_f64_i32(<vscale x 2 x double>  %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 2 x double> @test_scvtf_f64_i32(
+; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
+;
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+  ret <vscale x 2 x double>  %out
+}
+
+define <vscale x 8 x half> @test_ucvtf(<vscale x 8 x half> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_ucvtf(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
+;
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(<vscale x 8 x half> %a, <vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @test_ucvtf_f16_i32(<vscale x 8 x half> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_ucvtf_f16_i32(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
+;
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @test_ucvtf_f16_i64(<vscale x 8 x half> %a,<vscale x 2 x i64> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_ucvtf_f16_i64(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
+;
+  %out = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %b)
+  ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @test_ucvtf_f32_i64(<vscale x 4 x float> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_ucvtf_f32_i64(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %b)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double>  @test_ucvtf_f64_i32(<vscale x 2 x double>  %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 2 x double> @test_ucvtf_f64_i32(
+; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
+;
+  %out = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+  ret <vscale x 2 x double>  %out
+}