[llvm] [AArch64] optimise SVE cvt intrinsics with no active lanes (PR #104809)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 29 02:16:51 PDT 2024
https://github.com/Lukacma updated https://github.com/llvm/llvm-project/pull/104809
>From f77215aebf7592a3bc958c56a64b448bfb482136 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Mon, 19 Aug 2024 14:43:04 +0000
Subject: [PATCH 1/4] [AArch64] optimise SVE cvt intrinsics with no active
lanes
---
.../AArch64/AArch64TargetTransformInfo.cpp | 62 +++-
.../sve-intrinsic-comb-no-active-lanes-cvt.ll | 309 ++++++++++++++++++
2 files changed, 370 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index a782c9c4351237..c546dfb1453492 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1073,6 +1073,32 @@ static bool isAllActivePredicate(Value *Pred) {
m_ConstantInt<AArch64SVEPredPattern::all>()));
}
+// Simplify unary operation where predicate has all inactive lanes by replacing
+// instruction with its operand
+static std::optional<Instruction *>
+instCombineSVENoActiveUnaryReplace(InstCombiner &IC, IntrinsicInst &II,
+ bool hasInactiveVector) {
+ int PredOperand = hasInactiveVector ? 1 : 0;
+ int ReplaceOperand = hasInactiveVector ? 0 : 1;
+ if (match(II.getOperand(PredOperand), m_ZeroInt())) {
+ IC.replaceInstUsesWith(II, II.getOperand(ReplaceOperand));
+ return IC.eraseInstFromFunction(II);
+ }
+ return std::nullopt;
+}
+
+// Simplify unary operation where predicate has all inactive lanes or try to
+// replace with _x form when all lanes are active
+static std::optional<Instruction *>
+instCombineSVEAllOrNoActiveUnary(InstCombiner &IC, IntrinsicInst &II) {
+ if (isAllActivePredicate(II.getOperand(1)) &&
+ !isa<llvm::UndefValue>(II.getOperand(0))) {
+ Value *Undef = llvm::UndefValue::get(II.getType());
+ return IC.replaceOperand(II, 0, Undef);
+ }
+ return instCombineSVENoActiveUnaryReplace(IC, II, true);
+}
+
// Erase unary operation where predicate has all inactive lanes
static std::optional<Instruction *>
instCombineSVENoActiveUnaryErase(InstCombiner &IC, IntrinsicInst &II,
@@ -2104,7 +2130,41 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
switch (IID) {
default:
break;
-
+ case Intrinsic::aarch64_sve_fcvt_bf16f32:
+ case Intrinsic::aarch64_sve_fcvt_f16f32:
+ case Intrinsic::aarch64_sve_fcvt_f16f64:
+ case Intrinsic::aarch64_sve_fcvt_f32f16:
+ case Intrinsic::aarch64_sve_fcvt_f32f64:
+ case Intrinsic::aarch64_sve_fcvt_f64f16:
+ case Intrinsic::aarch64_sve_fcvt_f64f32:
+ case Intrinsic::aarch64_sve_fcvtlt_f32f16:
+ case Intrinsic::aarch64_sve_fcvtlt_f64f32:
+ case Intrinsic::aarch64_sve_fcvtnt_bf16f32:
+ case Intrinsic::aarch64_sve_fcvtnt_f16f32:
+ case Intrinsic::aarch64_sve_fcvtnt_f32f64:
+ case Intrinsic::aarch64_sve_fcvtx_f32f64:
+ case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
+ case Intrinsic::aarch64_sve_fcvtzs:
+ case Intrinsic::aarch64_sve_fcvtzs_i32f16:
+ case Intrinsic::aarch64_sve_fcvtzs_i32f64:
+ case Intrinsic::aarch64_sve_fcvtzs_i64f16:
+ case Intrinsic::aarch64_sve_fcvtzs_i64f32:
+ case Intrinsic::aarch64_sve_fcvtzu:
+ case Intrinsic::aarch64_sve_fcvtzu_i32f16:
+ case Intrinsic::aarch64_sve_fcvtzu_i32f64:
+ case Intrinsic::aarch64_sve_fcvtzu_i64f16:
+ case Intrinsic::aarch64_sve_fcvtzu_i64f32:
+ case Intrinsic::aarch64_sve_scvtf:
+ case Intrinsic::aarch64_sve_scvtf_f16i32:
+ case Intrinsic::aarch64_sve_scvtf_f16i64:
+ case Intrinsic::aarch64_sve_scvtf_f32i64:
+ case Intrinsic::aarch64_sve_scvtf_f64i32:
+ case Intrinsic::aarch64_sve_ucvtf:
+ case Intrinsic::aarch64_sve_ucvtf_f16i32:
+ case Intrinsic::aarch64_sve_ucvtf_f16i64:
+ case Intrinsic::aarch64_sve_ucvtf_f32i64:
+ case Intrinsic::aarch64_sve_ucvtf_f64i32:
+ return instCombineSVEAllOrNoActiveUnary(IC, II);
case Intrinsic::aarch64_sve_st1_scatter:
case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
case Intrinsic::aarch64_sve_st1_scatter_sxtw:
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll
new file mode 100644
index 00000000000000..eb01ff5dacab56
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll
@@ -0,0 +1,309 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+target triple = "aarch64-unknown-linux-gnu"
+
+define <vscale x 8 x bfloat> @test_fcvt_bf16_f32(<vscale x 8 x bfloat> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvt_bf16_f32(
+; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 8 x bfloat> [[A]]
+;
+ %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> %a, <vscale x 8 x i1> zeroinitializer, <vscale x 4 x float> %b)
+ ret <vscale x 8 x bfloat> %out
+}
+
+define <vscale x 8 x half> @test_fcvt_f16_f32(<vscale x 8 x half> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_fcvt_f16_f32(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 8 x half> [[A]]
+;
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @test_fcvt_f16_f64(<vscale x 8 x half> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_fcvt_f16_f64(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 8 x half> [[A]]
+;
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @test_fcvt_f32_f16(<vscale x 4 x float> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_fcvt_f32_f16(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 4 x float> [[A]]
+;
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 8 x half> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @test_fcvt_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_fcvt_f32_f64(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 4 x float> [[A]]
+;
+ %out = call<vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @test_fcvt_f64_f16(<vscale x 2 x double> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 2 x double> @test_fcvt_f64_f16(
+; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 2 x double> [[A]]
+;
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 8 x half> %b)
+ ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @test_fcvt_f64_f32(<vscale x 2 x double> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 2 x double> @test_fcvt_f64_f32(
+; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 2 x double> [[A]]
+;
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 4 x float> %b)
+ ret <vscale x 2 x double> %out
+}
+
+define <vscale x 4 x float> @test_fcvtlt_f32_f16(<vscale x 4 x float> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtlt_f32_f16(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 4 x float> [[A]]
+;
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 8 x half> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @test_fcvtlt_f64_f32(<vscale x 2 x double> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 2 x double> @test_fcvtlt_f64_f32(
+; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 2 x double> [[A]]
+;
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 4 x float> %b)
+ ret <vscale x 2 x double> %out
+}
+
+define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32(<vscale x 8 x bfloat> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32(
+; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 8 x bfloat> [[A]]
+;
+ %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> %a, <vscale x 8 x i1> zeroinitializer, <vscale x 4 x float> %b)
+ ret <vscale x 8 x bfloat> %out
+}
+
+define <vscale x 8 x half> @test_fcvtnt_f16_f32(<vscale x 8 x half> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_fcvtnt_f16_f32(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 8 x half> [[A]]
+;
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.f16f32(<vscale x 8 x half> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @test_fcvtnt_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtnt_f32_f64(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 4 x float> [[A]]
+;
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.f32f64(<vscale x 4 x float> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @test_fcvtx_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtx_f32_f64(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 4 x float> [[A]]
+;
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @test_fcvtxnt_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtxnt_f32_f64(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 4 x float> [[A]]
+;
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.f32f64(<vscale x 4 x float> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 8 x i16> @test_fcvtzs(<vscale x 8 x i16> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 8 x i16> @test_fcvtzs(
+; CHECK-SAME: <vscale x 8 x i16> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 8 x i16> [[A]]
+;
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32(<vscale x 8 x i16> %a, <vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @test_fcvtzs_i32_f16(<vscale x 4 x i32> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @test_fcvtzs_i32_f16(
+; CHECK-SAME: <vscale x 4 x i32> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 4 x i32> [[A]]
+;
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f16(<vscale x 4 x i32> %a, <vscale x 4 x i1> zeroinitializer,<vscale x 8 x half> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @test_fcvtzs_i32_f64(<vscale x 4 x i32> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @test_fcvtzs_i32_f64(
+; CHECK-SAME: <vscale x 4 x i32> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 4 x i32> [[A]]
+;
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f64(<vscale x 4 x i32> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @test_fcvtzs_i64_f16(<vscale x 2 x i64> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 2 x i64> @test_fcvtzs_i64_f16(
+; CHECK-SAME: <vscale x 2 x i64> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 2 x i64> [[A]]
+;
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f16(<vscale x 2 x i64> %a, <vscale x 2 x i1> zeroinitializer,<vscale x 8 x half> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @test_fcvtzs_i64_f32(<vscale x 2 x i64> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 2 x i64> @test_fcvtzs_i64_f32(
+; CHECK-SAME: <vscale x 2 x i64> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 2 x i64> [[A]]
+;
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f32(<vscale x 2 x i64> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 4 x float> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 8 x i16> @test_fcvtzu(<vscale x 8 x i16> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 8 x i16> @test_fcvtzu(
+; CHECK-SAME: <vscale x 8 x i16> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 8 x i16> [[A]]
+;
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32(<vscale x 8 x i16> %a, <vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @test_fcvtzu_i32_f16(<vscale x 4 x i32> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @test_fcvtzu_i32_f16(
+; CHECK-SAME: <vscale x 4 x i32> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 4 x i32> [[A]]
+;
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f16(<vscale x 4 x i32> %a, <vscale x 4 x i1> zeroinitializer,<vscale x 8 x half> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @test_fcvtzu_i32_f64(<vscale x 4 x i32> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @test_fcvtzu_i32_f64(
+; CHECK-SAME: <vscale x 4 x i32> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 4 x i32> [[A]]
+;
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f64(<vscale x 4 x i32> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @test_fcvtzu_i64_f16(<vscale x 2 x i64> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 2 x i64> @test_fcvtzu_i64_f16(
+; CHECK-SAME: <vscale x 2 x i64> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 2 x i64> [[A]]
+;
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f16(<vscale x 2 x i64> %a, <vscale x 2 x i1> zeroinitializer,<vscale x 8 x half> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @test_fcvtzu_i64_f32(<vscale x 2 x i64> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 2 x i64> @test_fcvtzu_i64_f32(
+; CHECK-SAME: <vscale x 2 x i64> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 2 x i64> [[A]]
+;
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f32(<vscale x 2 x i64> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 4 x float> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 8 x half> @test_scvtf(<vscale x 8 x half> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_scvtf(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 8 x half> [[A]]
+;
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half> %a, <vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @test_scvtf_f16_i32(<vscale x 8 x half> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_scvtf_f16_i32(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 8 x half> [[A]]
+;
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @test_scvtf_f16_i64(<vscale x 8 x half> %a,<vscale x 2 x i64> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_scvtf_f16_i64(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 8 x half> [[A]]
+;
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @test_scvtf_f32_i64(<vscale x 4 x float> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_scvtf_f32_i64(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 4 x float> [[A]]
+;
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @test_scvtf_f64_i32(<vscale x 2 x double> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 2 x double> @test_scvtf_f64_i32(
+; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 2 x double> [[A]]
+;
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+ ret <vscale x 2 x double> %out
+}
+
+define <vscale x 8 x half> @test_ucvtf(<vscale x 8 x half> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_ucvtf(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 8 x half> [[A]]
+;
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(<vscale x 8 x half> %a, <vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @test_ucvtf_f16_i32(<vscale x 8 x half> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_ucvtf_f16_i32(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 8 x half> [[A]]
+;
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @test_ucvtf_f16_i64(<vscale x 8 x half> %a,<vscale x 2 x i64> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_ucvtf_f16_i64(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 8 x half> [[A]]
+;
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @test_ucvtf_f32_i64(<vscale x 4 x float> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_ucvtf_f32_i64(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 4 x float> [[A]]
+;
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @test_ucvtf_f64_i32(<vscale x 2 x double> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 2 x double> @test_ucvtf_f64_i32(
+; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT: ret <vscale x 2 x double> [[A]]
+;
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+ ret <vscale x 2 x double> %out
+}
>From 82cc1e31368711b3c527dfc3399f5516c57d70aa Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Thu, 22 Aug 2024 15:37:47 +0000
Subject: [PATCH 2/4] Fix function naming and tests and added test for all
active lanes
---
.../AArch64/AArch64TargetTransformInfo.cpp | 6 +-
...sve-intrinsic-comb-all-active-lanes-cvt.ll | 411 ++++++++++++++++++
.../sve-intrinsic-comb-no-active-lanes-cvt.ll | 4 +-
3 files changed, 416 insertions(+), 5 deletions(-)
create mode 100644 llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index c546dfb1453492..52ec5d4c0a8c4c 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1076,8 +1076,8 @@ static bool isAllActivePredicate(Value *Pred) {
// Simplify unary operation where predicate has all inactive lanes by replacing
// instruction with its operand
static std::optional<Instruction *>
-instCombineSVENoActiveUnaryReplace(InstCombiner &IC, IntrinsicInst &II,
- bool hasInactiveVector) {
+instCombineSVENoActiveReplace(InstCombiner &IC, IntrinsicInst &II,
+ bool hasInactiveVector) {
int PredOperand = hasInactiveVector ? 1 : 0;
int ReplaceOperand = hasInactiveVector ? 0 : 1;
if (match(II.getOperand(PredOperand), m_ZeroInt())) {
@@ -1096,7 +1096,7 @@ instCombineSVEAllOrNoActiveUnary(InstCombiner &IC, IntrinsicInst &II) {
Value *Undef = llvm::UndefValue::get(II.getType());
return IC.replaceOperand(II, 0, Undef);
}
- return instCombineSVENoActiveUnaryReplace(IC, II, true);
+ return instCombineSVENoActiveReplace(IC, II, true);
}
// Erase unary operation where predicate has all inactive lanes
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll
new file mode 100644
index 00000000000000..40da5c57553351
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll
@@ -0,0 +1,411 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+target triple = "aarch64-unknown-linux-gnu"
+
+define <vscale x 8 x bfloat> @test_fcvt_bf16_f32(<vscale x 8 x bfloat> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvt_bf16_f32(
+; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x bfloat> [[OUT]]
+;
+ %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> %a, <vscale x 8 x i1> %pg, <vscale x 4 x float> %b)
+ ret <vscale x 8 x bfloat> %out
+}
+
+define <vscale x 8 x half> @test_fcvt_f16_f32(<vscale x 8 x half> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_fcvt_f16_f32(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> undef, <vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]]
+;
+ %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @test_fcvt_f16_f64(<vscale x 8 x half> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_fcvt_f16_f64(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]]
+;
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @test_fcvt_f32_f16(<vscale x 4 x float> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_fcvt_f32_f16(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> [[PG]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]]
+;
+ %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 8 x half> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @test_fcvt_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_fcvt_f32_f64(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]]
+;
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %out = call<vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @test_fcvt_f64_f16(<vscale x 2 x double> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 2 x double> @test_fcvt_f64_f16(
+; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> undef, <vscale x 2 x i1> [[PG]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x double> [[OUT]]
+;
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 8 x half> %b)
+ ret <vscale x 2 x double> %out
+}
+
+define <vscale x 2 x double> @test_fcvt_f64_f32(<vscale x 2 x double> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 2 x double> @test_fcvt_f64_f32(
+; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x double> [[OUT]]
+;
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 4 x float> %b)
+ ret <vscale x 2 x double> %out
+}
+
+define <vscale x 4 x float> @test_fcvtlt_f32_f16(<vscale x 4 x float> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtlt_f32_f16(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> [[PG]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]]
+;
+ %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 8 x half> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @test_fcvtlt_f64_f32(<vscale x 2 x double> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 2 x double> @test_fcvtlt_f64_f32(
+; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x double> [[OUT]]
+;
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 4 x float> %b)
+ ret <vscale x 2 x double> %out
+}
+
+define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32(<vscale x 8 x bfloat> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32(
+; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x bfloat> [[OUT]]
+;
+ %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> %a, <vscale x 8 x i1> %pg, <vscale x 4 x float> %b)
+ ret <vscale x 8 x bfloat> %out
+}
+
+define <vscale x 8 x half> @test_fcvtnt_f16_f32(<vscale x 8 x half> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_fcvtnt_f16_f32(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.f16f32(<vscale x 8 x half> undef, <vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]]
+;
+ %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.f16f32(<vscale x 8 x half> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @test_fcvtnt_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtnt_f32_f64(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]]
+;
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.f32f64(<vscale x 4 x float> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @test_fcvtx_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtx_f32_f64(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]]
+;
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @test_fcvtxnt_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtxnt_f32_f64(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]]
+;
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.f32f64(<vscale x 4 x float> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 8 x i16> @test_fcvtzs(<vscale x 8 x i16> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 8 x i16> @test_fcvtzs(
+; CHECK-SAME: <vscale x 8 x i16> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[PG]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[OUT]]
+;
+ %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @test_fcvtzs_i32_f16(<vscale x 4 x i32> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @test_fcvtzs_i32_f16(
+; CHECK-SAME: <vscale x 4 x i32> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f16(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[PG]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[OUT]]
+;
+ %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f16(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg,<vscale x 8 x half> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @test_fcvtzs_i32_f64(<vscale x 4 x i32> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @test_fcvtzs_i32_f64(
+; CHECK-SAME: <vscale x 4 x i32> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f64(<vscale x 4 x i32> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[OUT]]
+;
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f64(<vscale x 4 x i32> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @test_fcvtzs_i64_f16(<vscale x 2 x i64> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 2 x i64> @test_fcvtzs_i64_f16(
+; CHECK-SAME: <vscale x 2 x i64> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f16(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[PG]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[OUT]]
+;
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f16(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg,<vscale x 8 x half> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @test_fcvtzs_i64_f32(<vscale x 2 x i64> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 2 x i64> @test_fcvtzs_i64_f32(
+; CHECK-SAME: <vscale x 2 x i64> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f32(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[OUT]]
+;
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f32(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 4 x float> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 8 x i16> @test_fcvtzu(<vscale x 8 x i16> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 8 x i16> @test_fcvtzu(
+; CHECK-SAME: <vscale x 8 x i16> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[PG]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x i16> [[OUT]]
+;
+ %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @test_fcvtzu_i32_f16(<vscale x 4 x i32> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @test_fcvtzu_i32_f16(
+; CHECK-SAME: <vscale x 4 x i32> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f16(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[PG]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[OUT]]
+;
+ %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f16(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg,<vscale x 8 x half> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 4 x i32> @test_fcvtzu_i32_f64(<vscale x 4 x i32> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @test_fcvtzu_i32_f64(
+; CHECK-SAME: <vscale x 4 x i32> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f64(<vscale x 4 x i32> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x i32> [[OUT]]
+;
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f64(<vscale x 4 x i32> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @test_fcvtzu_i64_f16(<vscale x 2 x i64> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: define <vscale x 2 x i64> @test_fcvtzu_i64_f16(
+; CHECK-SAME: <vscale x 2 x i64> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f16(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[PG]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[OUT]]
+;
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f16(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg,<vscale x 8 x half> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @test_fcvtzu_i64_f32(<vscale x 2 x i64> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 2 x i64> @test_fcvtzu_i64_f32(
+; CHECK-SAME: <vscale x 2 x i64> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f32(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x i64> [[OUT]]
+;
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f32(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 4 x float> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 8 x half> @test_scvtf(<vscale x 8 x half> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_scvtf(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half> undef, <vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]]
+;
+ %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @test_scvtf_f16_i32(<vscale x 8 x half> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_scvtf_f16_i32(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> undef, <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]]
+;
+ %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @test_scvtf_f16_i64(<vscale x 8 x half> %a,<vscale x 2 x i64> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_scvtf_f16_i64(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]]
+;
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @test_scvtf_f32_i64(<vscale x 4 x float> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_scvtf_f32_i64(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]]
+;
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @test_scvtf_f64_i32(<vscale x 2 x double> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 2 x double> @test_scvtf_f64_i32(
+; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double> undef, <vscale x 2 x i1> [[PG]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x double> [[OUT]]
+;
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %b)
+ ret <vscale x 2 x double> %out
+}
+
+define <vscale x 8 x half> @test_ucvtf(<vscale x 8 x half> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_ucvtf(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(<vscale x 8 x half> undef, <vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]]
+;
+ %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @test_ucvtf_f16_i32(<vscale x 8 x half> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_ucvtf_f16_i32(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> undef, <vscale x 4 x i1> [[PG]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]]
+;
+ %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 8 x half> @test_ucvtf_f16_i64(<vscale x 8 x half> %a,<vscale x 2 x i64> %b) {
+; CHECK-LABEL: define <vscale x 8 x half> @test_ucvtf_f16_i64(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]]
+;
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @test_ucvtf_f32_i64(<vscale x 4 x float> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @test_ucvtf_f32_i64(
+; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x i64> [[B]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]]
+;
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @test_ucvtf_f64_i32(<vscale x 2 x double> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 2 x double> @test_ucvtf_f64_i32(
+; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double> undef, <vscale x 2 x i1> [[PG]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT: ret <vscale x 2 x double> [[OUT]]
+;
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %b)
+ ret <vscale x 2 x double> %out
+}
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll
index eb01ff5dacab56..9b1528eda8ffd4 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cvt.ll
@@ -133,7 +133,7 @@ define <vscale x 8 x i16> @test_fcvtzs(<vscale x 8 x i16> %a, <vscale x 8 x half
; CHECK-SAME: <vscale x 8 x i16> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
; CHECK-NEXT: ret <vscale x 8 x i16> [[A]]
;
- %out = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32(<vscale x 8 x i16> %a, <vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %b)
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16(<vscale x 8 x i16> %a, <vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %b)
ret <vscale x 8 x i16> %out
}
@@ -178,7 +178,7 @@ define <vscale x 8 x i16> @test_fcvtzu(<vscale x 8 x i16> %a, <vscale x 8 x half
; CHECK-SAME: <vscale x 8 x i16> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
; CHECK-NEXT: ret <vscale x 8 x i16> [[A]]
;
- %out = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32(<vscale x 8 x i16> %a, <vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %b)
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16(<vscale x 8 x i16> %a, <vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %b)
ret <vscale x 8 x i16> %out
}
>From c6b530e0ce857ce4706643f0554423efa43665d0 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Tue, 27 Aug 2024 11:37:50 +0000
Subject: [PATCH 3/4] Fix comment and add poison value check
---
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 52ec5d4c0a8c4c..4f8e60b9e94ad1 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1087,12 +1087,13 @@ instCombineSVENoActiveReplace(InstCombiner &IC, IntrinsicInst &II,
return std::nullopt;
}
-// Simplify unary operation where predicate has all inactive lanes or try to
-// replace with _x form when all lanes are active
+// Simplify unary operation where predicate has all inactive lanes or
+// replace unused first operand with undef when all lanes are active
static std::optional<Instruction *>
instCombineSVEAllOrNoActiveUnary(InstCombiner &IC, IntrinsicInst &II) {
if (isAllActivePredicate(II.getOperand(1)) &&
- !isa<llvm::UndefValue>(II.getOperand(0))) {
+ !isa<llvm::UndefValue>(II.getOperand(0)) &&
+ !isa<llvm::PoisonValue>(II.getOperand(0)) {
Value *Undef = llvm::UndefValue::get(II.getType());
return IC.replaceOperand(II, 0, Undef);
}
>From e5340ce53edf1da5b51f1a4c2df963ae726a4db0 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Wed, 28 Aug 2024 14:03:11 +0000
Subject: [PATCH 4/4] add tests for poison/undef and fix intrinsics name
---
.../AArch64/AArch64TargetTransformInfo.cpp | 2 +-
...sve-intrinsic-comb-all-active-lanes-cvt.ll | 26 ++++++++++++++++++-
2 files changed, 26 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 4f8e60b9e94ad1..e447440713530f 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1093,7 +1093,7 @@ static std::optional<Instruction *>
instCombineSVEAllOrNoActiveUnary(InstCombiner &IC, IntrinsicInst &II) {
if (isAllActivePredicate(II.getOperand(1)) &&
!isa<llvm::UndefValue>(II.getOperand(0)) &&
- !isa<llvm::PoisonValue>(II.getOperand(0)) {
+ !isa<llvm::PoisonValue>(II.getOperand(0))) {
Value *Undef = llvm::UndefValue::get(II.getType());
return IC.replaceOperand(II, 0, Undef);
}
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll
index 40da5c57553351..374a9851917685 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll
@@ -2,6 +2,30 @@
; RUN: opt -S -passes=instcombine < %s | FileCheck %s
target triple = "aarch64-unknown-linux-gnu"
+define <vscale x 8 x bfloat> @test_fcvt_bf16_f32_undef(<vscale x 8 x bfloat> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvt_bf16_f32_undef(
+; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x bfloat> [[OUT]]
+;
+ %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> %pg, <vscale x 4 x float> %b)
+ ret <vscale x 8 x bfloat> %out
+}
+
+define <vscale x 8 x bfloat> @test_fcvt_bf16_f32_poison(<vscale x 8 x bfloat> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvt_bf16_f32_poison(
+; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT: ret <vscale x 8 x bfloat> [[OUT]]
+;
+ %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> poison, <vscale x 8 x i1> %pg, <vscale x 4 x float> %b)
+ ret <vscale x 8 x bfloat> %out
+}
+
define <vscale x 8 x bfloat> @test_fcvt_bf16_f32(<vscale x 8 x bfloat> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvt_bf16_f32(
; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
@@ -178,7 +202,7 @@ define <vscale x 8 x i16> @test_fcvtzs(<vscale x 8 x i16> %a, <vscale x 8 x half
; CHECK-NEXT: ret <vscale x 8 x i16> [[OUT]]
;
%pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
- %out = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b)
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv4i16.nxv4f16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b)
ret <vscale x 8 x i16> %out
}
More information about the llvm-commits
mailing list