[llvm] [AArch64] Generate zeroing forms of certain SVE2.2 instructions (7/11) (PR #116833)
Momchil Velikov via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 20 10:17:50 PST 2024
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/116833
>From 482b828db88326bc2e356f53fe11f03443a7989e Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Thu, 19 Dec 2024 15:43:05 +0000
Subject: [PATCH 1/8] [AArch64] Prefer SVE2.2 zeroing forms of certain
instructions with an all-true predicate
When the predicate of a destructive operation is known to be all-true,
for example
fabs z0.s, p0/m, z1.s
then the entire output register is written and we can use a zeroing
(instead of a merging) form of the instruction, for example
fabs z0.s, p0/z, z1.s
thus eliminate the dependency on the input-output destination register
without the need to insert a `movprfx`.
This patch complements (and in the case of 2b3266c1701f, fixes a regression)
the following:
7f4414b2a1a4 [AArch64] Generate zeroing forms of certain SVE2.2 instructions (4/11) (#116830)
2474cf7ad123 [AArch64] Generate zeroing forms of certain SVE2.2 instructions (3/11) (#116829)
6f285d311595 [AArch64] Generate zeroing forms of certain SVE2.2 instructions (2/11) (#116828)
2b3266c1701f [AArch64] Generate zeroing forms of certain SVE2.2 instructions (1/11) (#116259)
---
.../Target/AArch64/AArch64ISelDAGToDAG.cpp | 2 +
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 3 -
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 8 -
llvm/lib/Target/AArch64/SVEInstrFormats.td | 102 ++-
.../CodeGen/AArch64/zeroing-forms-abs-neg.ll | 600 +++++++++++++++++-
.../AArch64/zeroing-forms-fcvt-bfcvt.ll | 287 ++++++++-
.../AArch64/zeroing-forms-fcvtlt-fcvtx.ll | 123 +++-
.../CodeGen/AArch64/zeroing-forms-fcvtzsu.ll | 580 ++++++++++++++++-
8 files changed, 1577 insertions(+), 128 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index ff3ca8a24fc04a..6aa8cd4f0232ac 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -228,6 +228,8 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
return false;
}
+ bool SelectAny(SDValue) { return true; }
+
bool SelectDupZero(SDValue N) {
switch(N->getOpcode()) {
case AArch64ISD::DUP:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index d015cc15581ad0..b1c61c8f84ef45 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -381,9 +381,6 @@ def NoUseScalarIncVL : Predicate<"!Subtarget->useScalarIncVL()">;
def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;
-def UseUnaryUndefPseudos
- : Predicate<"!(Subtarget->isSVEorStreamingSVEAvailable() && (Subtarget->hasSVE2p2() || Subtarget->hasSME2p2()))">;
-
def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
SDTCisInt<1>]>>;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 4a4412f9df6a1a..1e297eb5feb266 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -675,14 +675,6 @@ let Predicates = [HasSVEorSME] in {
defm FABS_ZPmZ : sve_int_un_pred_arit_bitwise_fp<0b100, "fabs", AArch64fabs_mt>;
defm FNEG_ZPmZ : sve_int_un_pred_arit_bitwise_fp<0b101, "fneg", AArch64fneg_mt>;
- let Predicates = [HasSVEorSME, UseUnaryUndefPseudos] in {
- defm FABS_ZPmZ : sve_fp_un_pred_arit_hsd<AArch64fabs_mt>;
- defm FNEG_ZPmZ : sve_fp_un_pred_arit_hsd<AArch64fneg_mt>;
-
- defm ABS_ZPmZ : sve_int_un_pred_arit_bhsd<AArch64abs_mt>;
- defm NEG_ZPmZ : sve_int_un_pred_arit_bhsd<AArch64neg_mt>;
- }
-
foreach VT = [nxv2bf16, nxv4bf16, nxv8bf16] in {
// No dedicated instruction, so just clear the sign bit.
def : Pat<(VT (fabs VT:$op)),
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index f8a633872cb966..a1bce3601aab60 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -484,6 +484,7 @@ let Predicates = [HasSVEorSME] in {
//===----------------------------------------------------------------------===//
def SVEDup0 : ComplexPattern<vAny, 0, "SelectDupZero", []>;
def SVEDup0Undef : ComplexPattern<vAny, 0, "SelectDupZeroOrUndef", []>;
+def SVEAny : ComplexPattern<vAny, 0, "SelectAny", []>;
class SVE_1_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
Instruction inst>
@@ -504,10 +505,15 @@ multiclass SVE_1_Op_PassthruUndef_Pat<ValueType vtd, SDPatternOperator op, Value
(inst $Op3, $Op1, $Op2)>;
}
-class SVE_1_Op_PassthruUndefZero_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
- ValueType vts, Instruction inst>
- : Pat<(vtd (op pg:$Op1, vts:$Op2, (vtd (SVEDup0Undef)))),
- (inst $Op1, $Op2)>;
+multiclass SVE_1_Op_PassthruUndefZero_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
+ ValueType vts, Instruction inst> {
+ let AddedComplexity = 1 in {
+ def : Pat<(vtd (op pg:$Op1, vts:$Op2, (vtd (SVEDup0Undef)))),
+ (inst $Op1, $Op2)>;
+ def : Pat<(vtd (op (pg (SVEAllActive:$Op1)), vts:$Op2, (vtd (SVEAny)))),
+ (inst $Op1, $Op2)>;
+ }
+}
// Used to match FP_ROUND_MERGE_PASSTHRU, which has an additional flag for the
// type of rounding. This is matched by timm0_1 in pattern below and ignored.
@@ -576,10 +582,15 @@ multiclass SVE_3_Op_Undef_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1
(inst $Op1, $Op2, $Op3)>;
}
-class SVE_3_Op_UndefZero_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
- ValueType vt2, ValueType vt3, Instruction inst>
- : Pat<(vtd (op (vt1 (SVEDup0Undef)), vt2:$Op1, vt3:$Op2)),
- (inst $Op1, $Op2)>;
+multiclass SVE_3_Op_UndefZero_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
+ ValueType vt2, ValueType vt3, Instruction inst> {
+ let AddedComplexity = 1 in {
+ def : Pat<(vtd (op (vt1 (SVEDup0Undef)), vt2:$Op1, vt3:$Op2)),
+ (inst $Op1, $Op2)>;
+ def : Pat<(vtd (op (vt1 (SVEAny)), (vt2 (SVEAllActive:$Op2)), vt3:$Op3)),
+ (inst $Op2, $Op3)>;
+ }
+}
class SVE_4_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, ValueType vt3, ValueType vt4,
@@ -2862,8 +2873,8 @@ multiclass sve_fp_fcvtltz<string asm, string op> {
def _HtoS : sve_fp_fcvt2z<0b1001, asm, ZPR32, ZPR16>;
def _StoD : sve_fp_fcvt2z<0b1111, asm, ZPR64, ZPR32>;
- def : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
- def : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
}
//===----------------------------------------------------------------------===//
@@ -3273,7 +3284,7 @@ class sve_fp_z2op_p_zd<bits<7> opc,string asm, RegisterOperand i_zprtype,
multiclass sve_fp_z2op_p_zd<string asm, SDPatternOperator op> {
def _DtoS : sve_fp_z2op_p_zd<0b0001010, asm, ZPR64, ZPR32>;
- def : SVE_3_Op_UndefZero_Pat<nxv4f32, op, nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv4f32, op, nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
}
multiclass sve_fp_z2op_p_zd_hsd<bits<5> opc, string asm> {
@@ -3290,7 +3301,7 @@ multiclass sve_fp_z2op_p_zd_frint<bits<2> opc, string asm> {
multiclass sve_fp_z2op_p_zd_bfcvt<string asm, SDPatternOperator op> {
def NAME : sve_fp_z2op_p_zd<0b1001010, asm, ZPR32, ZPR16>;
- def : SVE_3_Op_UndefZero_Pat<nxv8bf16, op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv8bf16, op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
}
multiclass sve_fp_z2op_p_zd_d<bit U, string asm, string int_op, SDPatternOperator ir_op> {
@@ -3302,14 +3313,14 @@ multiclass sve_fp_z2op_p_zd_d<bit U, string asm, string int_op, SDPatternOperato
def _DtoS : sve_fp_z2op_p_zd<{ 0b111100, U }, asm, ZPR64, ZPR32>;
def _DtoD : sve_fp_z2op_p_zd<{ 0b111111, U }, asm, ZPR64, ZPR64>;
- def : SVE_3_Op_UndefZero_Pat<nxv4i32, !cast<SDPatternOperator>(int_op # _i32f64), nxv4i32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
- def : SVE_3_Op_UndefZero_Pat<nxv2i64, !cast<SDPatternOperator>(int_op # _i64f32), nxv2i64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
- def : SVE_3_Op_UndefZero_Pat<nxv4i32, !cast<SDPatternOperator>(int_op # _i32f16), nxv4i32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
- def : SVE_3_Op_UndefZero_Pat<nxv2i64, !cast<SDPatternOperator>(int_op # _i64f16), nxv2i64, nxv2i1, nxv8f16, !cast<Instruction>(NAME # _HtoD)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv4i32, !cast<SDPatternOperator>(int_op # _i32f64), nxv4i32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv2i64, !cast<SDPatternOperator>(int_op # _i64f32), nxv2i64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv4i32, !cast<SDPatternOperator>(int_op # _i32f16), nxv4i32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv2i64, !cast<SDPatternOperator>(int_op # _i64f16), nxv2i64, nxv2i1, nxv8f16, !cast<Instruction>(NAME # _HtoD)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, ir_op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _HtoH)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, ir_op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoS)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, ir_op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoD)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, ir_op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _HtoH)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, ir_op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoS)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, ir_op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoD)>;
}
multiclass sve_fp_z2op_p_zd_c<bit U, string asm> {
@@ -3336,12 +3347,12 @@ multiclass sve_fp_z2op_p_zd_b_0<string asm, string op> {
def _DtoS : sve_fp_z2op_p_zd<0b1101010, asm, ZPR64, ZPR32>;
def _StoD : sve_fp_z2op_p_zd<0b1101011, asm, ZPR32, ZPR64>;
- def : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f32), nxv8f16, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoH)>;
- def : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f64), nxv8f16, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoH)>;
- def : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
- def : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
- def : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f16), nxv2f64, nxv2i1, nxv8f16, !cast<Instruction>(NAME # _HtoD)>;
- def : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f32), nxv8f16, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoH)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f64), nxv8f16, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoH)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f16), nxv2f64, nxv2i1, nxv8f16, !cast<Instruction>(NAME # _HtoD)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
}
//===----------------------------------------------------------------------===//
@@ -4859,6 +4870,16 @@ multiclass sve_int_un_pred_arit<bits<3> opc, string asm,
def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ def _B_UNDEF : PredOneOpPassthruPseudo<NAME # _B, ZPR8>;
+ def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
+ def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
+ def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Pseudo>(NAME # _B_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _S_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Pseudo>(NAME # _D_UNDEF)>;
}
multiclass sve_int_un_pred_arit_z<bits<3> opc, string asm, SDPatternOperator op> {
@@ -4867,10 +4888,10 @@ multiclass sve_int_un_pred_arit_z<bits<3> opc, string asm, SDPatternOperator op>
def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b0 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b0 }, asm, ZPR64>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_un_pred_arit_h<bits<3> opc, string asm,
@@ -4984,6 +5005,17 @@ multiclass sve_int_un_pred_arit_bitwise_fp<bits<3> opc, string asm,
def : SVE_1_Op_Passthru_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
+
+ def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
+ def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
+ def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Pseudo>(NAME # _S_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Pseudo>(NAME # _S_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Pseudo>(NAME # _D_UNDEF)>;
}
multiclass sve_int_un_pred_arit_bitwise_fp_z<bits<3> opc, string asm, SDPatternOperator op> {
@@ -4991,12 +5023,12 @@ multiclass sve_int_un_pred_arit_bitwise_fp_z<bits<3> opc, string asm, SDPatternO
def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b1 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b1 }, asm, ZPR64>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_fp_un_pred_arit_hsd<SDPatternOperator op> {
diff --git a/llvm/test/CodeGen/AArch64/zeroing-forms-abs-neg.ll b/llvm/test/CodeGen/AArch64/zeroing-forms-abs-neg.ll
index 1caee994220f05..510d4576646f12 100644
--- a/llvm/test/CodeGen/AArch64/zeroing-forms-abs-neg.ll
+++ b/llvm/test/CodeGen/AArch64/zeroing-forms-abs-neg.ll
@@ -18,7 +18,7 @@ define <vscale x 2 x double> @test_svabs_f64_x_1(<vscale x 2 x i1> %pg, <vscale
; CHECK-2p2-NEXT: fabs z0.d, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 2 x double> %0
}
@@ -34,7 +34,7 @@ define <vscale x 2 x double> @test_svabs_f64_x_2(<vscale x 2 x i1> %pg, double %
; CHECK-2p2-NEXT: fabs z0.d, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 2 x double> %0
}
@@ -65,7 +65,7 @@ define <vscale x 4 x float> @test_svabs_f32_x_1(<vscale x 4 x i1> %pg, <vscale x
; CHECK-2p2-NEXT: fabs z0.s, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 4 x float> %0
}
@@ -81,7 +81,7 @@ define <vscale x 4 x float> @test_svabs_f32_x_2(<vscale x 4 x i1> %pg, double %z
; CHECK-2p2-NEXT: fabs z0.s, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 4 x float> %0
}
@@ -112,7 +112,7 @@ define <vscale x 8 x half> @test_svabs_f16_x_1(<vscale x 8 x i1> %pg, <vscale x
; CHECK-2p2-NEXT: fabs z0.h, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 8 x half> %0
}
@@ -128,7 +128,7 @@ define <vscale x 8 x half> @test_svabs_f16_x_2(<vscale x 8 x i1> %pg, double %z0
; CHECK-2p2-NEXT: fabs z0.h, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 8 x half> %0
}
@@ -159,7 +159,7 @@ define <vscale x 16 x i8> @test_svabs_s8_x_1(<vscale x 16 x i1> %pg, <vscale x 1
; CHECK-2p2-NEXT: abs z0.b, p0/z, z0.b
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
ret <vscale x 16 x i8> %0
}
@@ -175,8 +175,8 @@ define <vscale x 16 x i8> @test_svabs_s8_x_2(<vscale x 16 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: abs z0.b, p0/z, z1.b
; CHECK-2p2-NEXT: ret
entry:
- %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
- ret <vscale x 16 x i8> %1
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
}
define <vscale x 16 x i8> @test_svabs_s8_z(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
@@ -191,8 +191,8 @@ define <vscale x 16 x i8> @test_svabs_s8_z(<vscale x 16 x i1> %pg, double %z0, <
; CHECK-2p2-NEXT: abs z0.b, p0/z, z1.b
; CHECK-2p2-NEXT: ret
entry:
- %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
- ret <vscale x 16 x i8> %1
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
}
define <vscale x 8 x i16> @test_svabs_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %x) {
@@ -206,7 +206,7 @@ define <vscale x 8 x i16> @test_svabs_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8
; CHECK-2p2-NEXT: abs z0.h, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
ret <vscale x 8 x i16> %0
}
@@ -222,7 +222,7 @@ define <vscale x 8 x i16> @test_svabs_s16_x_2(<vscale x 8 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: abs z0.h, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
ret <vscale x 8 x i16> %0
}
@@ -253,7 +253,7 @@ define <vscale x 4 x i32> @test_svabs_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4
; CHECK-2p2-NEXT: abs z0.s, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 4 x i32> %0
}
@@ -269,7 +269,7 @@ define <vscale x 4 x i32> @test_svabs_s32_x_2(<vscale x 4 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: abs z0.s, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 4 x i32> %0
}
@@ -300,7 +300,7 @@ define <vscale x 2 x i64> @test_svabs_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2
; CHECK-2p2-NEXT: abs z0.d, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.abs.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.abs.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 2 x i64> %0
}
@@ -316,7 +316,7 @@ define <vscale x 2 x i64> @test_svabs_s64_x_2(<vscale x 2 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: abs z0.d, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.abs.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.abs.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 2 x i64> %0
}
@@ -347,7 +347,7 @@ define <vscale x 2 x double> @test_svneg_f64_x_1(<vscale x 2 x i1> %pg, <vscale
; CHECK-2p2-NEXT: fneg z0.d, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fneg.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fneg.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 2 x double> %0
}
@@ -363,7 +363,7 @@ define <vscale x 2 x double> @test_svneg_f64_x_2(<vscale x 2 x i1> %pg, double %
; CHECK-2p2-NEXT: fneg z0.d, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fneg.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fneg.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 2 x double> %0
}
@@ -394,7 +394,7 @@ define <vscale x 4 x float> @test_svneg_f32_x_1(<vscale x 4 x i1> %pg, <vscale x
; CHECK-2p2-NEXT: fneg z0.s, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fneg.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fneg.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 4 x float> %0
}
@@ -410,7 +410,7 @@ define <vscale x 4 x float> @test_svneg_f32_x_2(<vscale x 4 x i1> %pg, double %z
; CHECK-2p2-NEXT: fneg z0.s, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fneg.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fneg.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 4 x float> %0
}
@@ -441,7 +441,7 @@ define <vscale x 8 x half> @test_svneg_f16_x_1(<vscale x 8 x i1> %pg, <vscale x
; CHECK-2p2-NEXT: fneg z0.h, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fneg.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fneg.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 8 x half> %0
}
@@ -457,7 +457,7 @@ define <vscale x 8 x half> @test_svneg_f16_x_2(<vscale x 8 x i1> %pg, double %z0
; CHECK-2p2-NEXT: fneg z0.h, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fneg.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fneg.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 8 x half> %0
}
@@ -488,7 +488,7 @@ define <vscale x 16 x i8> @test_svneg_s8_x_1(<vscale x 16 x i1> %pg, <vscale x 1
; CHECK-2p2-NEXT: neg z0.b, p0/z, z0.b
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.neg.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.neg.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
ret <vscale x 16 x i8> %0
}
@@ -504,8 +504,8 @@ define <vscale x 16 x i8> @test_svneg_s8_x_2(<vscale x 16 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: neg z0.b, p0/z, z1.b
; CHECK-2p2-NEXT: ret
entry:
- %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.neg.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
- ret <vscale x 16 x i8> %1
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.neg.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
}
define <vscale x 16 x i8> @test_svneg_s8_z(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
@@ -520,8 +520,8 @@ define <vscale x 16 x i8> @test_svneg_s8_z(<vscale x 16 x i1> %pg, double %z0, <
; CHECK-2p2-NEXT: neg z0.b, p0/z, z1.b
; CHECK-2p2-NEXT: ret
entry:
- %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.neg.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
- ret <vscale x 16 x i8> %1
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.neg.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
}
define <vscale x 8 x i16> @test_svneg_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %x) {
@@ -535,7 +535,7 @@ define <vscale x 8 x i16> @test_svneg_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8
; CHECK-2p2-NEXT: neg z0.h, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.neg.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.neg.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
ret <vscale x 8 x i16> %0
}
@@ -551,7 +551,7 @@ define <vscale x 8 x i16> @test_svneg_s16_x_2(<vscale x 8 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: neg z0.h, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.neg.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.neg.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
ret <vscale x 8 x i16> %0
}
@@ -582,7 +582,7 @@ define <vscale x 4 x i32> @test_svneg_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4
; CHECK-2p2-NEXT: neg z0.s, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.neg.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.neg.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 4 x i32> %0
}
@@ -598,7 +598,7 @@ define <vscale x 4 x i32> @test_svneg_s32_x_2(<vscale x 4 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: neg z0.s, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.neg.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.neg.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 4 x i32> %0
}
@@ -629,7 +629,7 @@ define <vscale x 2 x i64> @test_svneg_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2
; CHECK-2p2-NEXT: neg z0.d, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.neg.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.neg.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 2 x i64> %0
}
@@ -645,7 +645,7 @@ define <vscale x 2 x i64> @test_svneg_s64_x_2(<vscale x 2 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: neg z0.d, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.neg.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.neg.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 2 x i64> %0
}
@@ -664,3 +664,535 @@ entry:
%0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.neg.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 2 x i64> %0
}
+
+define <vscale x 2 x double> @test_svfabs_f64_ptrue_u(double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_svfabs_f64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fabs z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfabs_f64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fabs z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_svfabs_f64_ptrue(double %z0, <vscale x 2 x double> %x, <vscale x 2 x double> %y) {
+; CHECK-LABEL: test_svfabs_f64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fabs z0.d, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfabs_f64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fabs z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double> %x, <vscale x 2 x i1> %pg, <vscale x 2 x double> %y)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 4 x float> @test_svfabs_f32_ptrue_u(double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_svfabs_f32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fabs z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfabs_f32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fabs z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_svfabs_f32_ptrue(double %z0, <vscale x 4 x float> %x, <vscale x 4 x float> %y) {
+; CHECK-LABEL: test_svfabs_f32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fabs z0.s, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfabs_f32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fabs z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float> %x, <vscale x 4 x i1> %pg, <vscale x 4 x float> %y)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 8 x half> @test_svfabs_f16_ptrue_u(double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_svfabs_f16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fabs z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfabs_f16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: fabs z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_svfabs_f16_ptrue(double %z0, <vscale x 8 x half> %x, <vscale x 8 x half> %y) {
+; CHECK-LABEL: test_svfabs_f16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fabs z0.h, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfabs_f16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: fabs z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half> %x, <vscale x 8 x i1> %pg, <vscale x 8 x half> %y)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 16 x i8> @test_svabs_s8_ptrue_u(double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svabs_s8_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: abs z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_s8_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.b
+; CHECK-2p2-NEXT: abs z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svabs_s8_ptrue(double %z0, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) {
+; CHECK-LABEL: test_svabs_s8_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: abs z0.b, p0/m, z2.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_s8_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.b
+; CHECK-2p2-NEXT: abs z0.b, p0/z, z2.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %y)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 8 x i16> @test_svabs_s16_ptrue_u(double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svabs_s16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: abs z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_s16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: abs z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nx84i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svabs_s16_ptrue(double %z0, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) {
+; CHECK-LABEL: test_svabs_s16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: abs z0.h, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_s16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: abs z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %y)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svabs_s32_ptrue_u(double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svabs_s32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: abs z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_s32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: abs z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svabs_s32_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: test_svabs_s32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: abs z0.s, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_s32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: abs z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %y)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svabs_s64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svabs_s64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: abs z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_s64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: abs z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.abs.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svabs_s64_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
+; CHECK-LABEL: test_svabs_s64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: abs z0.d, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_s64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: abs z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.abs.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x double> @test_svfneg_f64_ptrue_u(double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_svfneg_f64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fneg z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfneg_f64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fneg z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fneg.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_svfneg_f64_ptrue(double %z0, <vscale x 2 x double> %x, <vscale x 2 x double> %y) {
+; CHECK-LABEL: test_svfneg_f64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fneg z0.d, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfneg_f64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fneg z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fneg.nxv2f64(<vscale x 2 x double> %x, <vscale x 2 x i1> %pg, <vscale x 2 x double> %y)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 4 x float> @test_svfneg_f32_ptrue_u(double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_svfneg_f32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fneg z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfneg_f32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fneg z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fneg.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_svfneg_f32_ptrue(double %z0, <vscale x 4 x float> %x, <vscale x 4 x float> %y) {
+; CHECK-LABEL: test_svfneg_f32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fneg z0.s, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfneg_f32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fneg z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fneg.nxv4f32(<vscale x 4 x float> %x, <vscale x 4 x i1> %pg, <vscale x 4 x float> %y)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 8 x half> @test_svfneg_f16_ptrue_u(double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_svfneg_f16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fneg z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfneg_f16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: fneg z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fneg.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_svfneg_f16_ptrue(double %z0, <vscale x 8 x half> %x, <vscale x 8 x half> %y) {
+; CHECK-LABEL: test_svfneg_f16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fneg z0.h, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfneg_f16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: fneg z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fneg.nxv8f16(<vscale x 8 x half> %x, <vscale x 8 x i1> %pg, <vscale x 8 x half> %y)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 16 x i8> @test_svneg_s8_ptrue_u(double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svneg_s8_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: neg z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svneg_s8_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.b
+; CHECK-2p2-NEXT: neg z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.neg.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svneg_s8_ptrue(double %z0, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) {
+; CHECK-LABEL: test_svneg_s8_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: neg z0.b, p0/m, z2.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svneg_s8_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.b
+; CHECK-2p2-NEXT: neg z0.b, p0/z, z2.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.neg.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %y)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 8 x i16> @test_svneg_s16_ptrue_u(double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svneg_s16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: neg z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svneg_s16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: neg z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.neg.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svneg_s16_ptrue(double %z0, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) {
+; CHECK-LABEL: test_svneg_s16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: neg z0.h, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svneg_s16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: neg z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.neg.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %y)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svneg_s32_ptrue_u(double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svneg_s32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: neg z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svneg_s32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: neg z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.neg.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svneg_s32_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: test_svneg_s32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: neg z0.s, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svneg_s32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: neg z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.neg.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %y)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svneg_s64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svneg_s64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: neg z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svneg_s64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: neg z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.neg.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svneg_s64_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
+; CHECK-LABEL: test_svneg_s64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: neg z0.d, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svneg_s64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: neg z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.neg.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
+ ret <vscale x 2 x i64> %0
+}
diff --git a/llvm/test/CodeGen/AArch64/zeroing-forms-fcvt-bfcvt.ll b/llvm/test/CodeGen/AArch64/zeroing-forms-fcvt-bfcvt.ll
index cf9ac49ca7b236..855bf9a3b3c491 100644
--- a/llvm/test/CodeGen/AArch64/zeroing-forms-fcvt-bfcvt.ll
+++ b/llvm/test/CodeGen/AArch64/zeroing-forms-fcvt-bfcvt.ll
@@ -18,7 +18,7 @@ define <vscale x 8 x half> @test_svcvt_f16_f32_x_1(<vscale x 4 x i1> %pg, <vscal
; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 8 x half> %0
}
@@ -33,7 +33,7 @@ define <vscale x 8 x half> @test_svcvt_f16_f32_x_2(<vscale x 4 x i1> %pg, double
; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 8 x half> %0
}
@@ -64,7 +64,7 @@ define <vscale x 8 x bfloat> @test_svcvt_bf16_f32_x_1(<vscale x 4 x i1> %pg, <vs
; CHECK-2p2-NEXT: bfcvt z0.h, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 8 x bfloat> %0
}
@@ -79,7 +79,7 @@ define <vscale x 8 x bfloat> @test_svcvt_bf16_f32_x_2(<vscale x 4 x i1> %pg, dou
; CHECK-2p2-NEXT: bfcvt z0.h, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 8 x bfloat> %0
}
@@ -110,7 +110,7 @@ define <vscale x 8 x half> @test_svcvt_f16_f64_x_1(<vscale x 2 x i1> %pg, <vscal
; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 8 x half> %0
}
@@ -125,7 +125,7 @@ define <vscale x 8 x half> @test_svcvt_f16_f64_x_2(<vscale x 2 x i1> %pg, double
; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 8 x half> %0
}
@@ -156,7 +156,7 @@ define <vscale x 4 x float> @test_svcvt_f32_f64_x_1(<vscale x 2 x i1> %pg, <vsca
; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 4 x float> %0
}
@@ -171,7 +171,7 @@ define <vscale x 4 x float> @test_svcvt_f32_f64_x_2(<vscale x 2 x i1> %pg, doubl
; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 4 x float> %0
}
@@ -202,7 +202,7 @@ define <vscale x 4 x float> @test_svcvt_f32_f16_x_1(<vscale x 4 x i1> %pg, <vsca
; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 4 x float> %0
}
@@ -217,7 +217,7 @@ define <vscale x 4 x float> @test_svcvt_f32_f16_x_2(<vscale x 4 x i1> %pg, doubl
; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 4 x float> %0
}
@@ -248,7 +248,7 @@ define <vscale x 2 x double> @test_svcvt_f64_f16_x_1(<vscale x 2 x i1> %pg, <vsc
; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 2 x double> %0
}
@@ -263,7 +263,7 @@ define <vscale x 2 x double> @test_svcvt_f64_f16_x_2(<vscale x 2 x i1> %pg, doub
; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 2 x double> %0
}
@@ -294,7 +294,7 @@ define <vscale x 2 x double> @test_svcvt_f64_f32_x_1(<vscale x 2 x i1> %pg, <vsc
; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 2 x double> %0
}
@@ -309,7 +309,7 @@ define <vscale x 2 x double> @test_svcvt_f64_f32_x_2(<vscale x 2 x i1> %pg, doub
; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 2 x double> %0
}
@@ -328,3 +328,262 @@ entry:
%0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 2 x double> %0
}
+
+define <vscale x 8 x half> @test_svcvt_f16_f32_ptrue_u(double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_svcvt_f16_f32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvt z0.h, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f16_f32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_svcvt_f16_f32_ptrue(double %z0, <vscale x 8 x half> %x, <vscale x 4 x float> %y ) {
+; CHECK-LABEL: test_svcvt_f16_f32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvt z0.h, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f16_f32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> %x, <vscale x 4 x i1> %pg, <vscale x 4 x float> %y)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x bfloat> @test_svcvt_bf16_f32_ptrue_u(double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_svcvt_bf16_f32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: bfcvt z0.h, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_bf16_f32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: bfcvt z0.h, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 8 x bfloat> %0
+}
+
+define <vscale x 8 x bfloat> @test_svcvt_bf16_f32_ptrue(double %z0, <vscale x 8 x bfloat> %x, <vscale x 4 x float> %y ) {
+; CHECK-LABEL: test_svcvt_bf16_f32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: bfcvt z0.h, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_bf16_f32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: bfcvt z0.h, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> %x, <vscale x 4 x i1> %pg, <vscale x 4 x float> %y)
+ ret <vscale x 8 x bfloat> %0
+}
+
+define <vscale x 8 x half> @test_svcvt_f16_f64_ptrue_u(double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_svcvt_f16_f64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvt z0.h, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f16_f64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_svcvt_f16_f64_ptrue(double %z0, <vscale x 8 x half> %x, <vscale x 2 x double> %y ) {
+; CHECK-LABEL: test_svcvt_f16_f64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvt z0.h, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f16_f64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> %x, <vscale x 2 x i1> %pg, <vscale x 2 x double> %y)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 4 x float> @test_svcvt_f32_f64_ptrue_u(double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_svcvt_f32_f64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvt z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f32_f64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_svcvt_f32_f64_ptrue(double %z0, <vscale x 4 x float> %x, <vscale x 2 x double> %y ) {
+; CHECK-LABEL: test_svcvt_f32_f64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvt z0.s, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f32_f64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> %x, <vscale x 2 x i1> %pg, <vscale x 2 x double> %y)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_svcvt_f32_f16_ptrue_u(double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_svcvt_f32_f16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvt z0.s, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f32_f16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_svcvt_f32_f16_ptrue(double %z0, <vscale x 4 x float> %x, <vscale x 8 x half> %y ) {
+; CHECK-LABEL: test_svcvt_f32_f16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvt z0.s, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f32_f16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> %x, <vscale x 4 x i1> %pg, <vscale x 8 x half> %y)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 2 x double> @test_svcvt_f64_f16_ptrue_u(double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_svcvt_f64_f16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvt z0.d, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f64_f16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_svcvt_f64_f16_ptrue(double %z0, <vscale x 2 x double> %x, <vscale x 8 x half> %y ) {
+; CHECK-LABEL: test_svcvt_f64_f16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvt z0.d, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f64_f16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> %x, <vscale x 2 x i1> %pg, <vscale x 8 x half> %y)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_svcvt_f64_f32_ptrue_u(double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_svcvt_f64_f32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvt z0.d, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f64_f32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_svcvt_f64_f32_ptrue(double %z0, <vscale x 2 x double> %x, <vscale x 4 x float> %y ) {
+; CHECK-LABEL: test_svcvt_f64_f32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvt z0.d, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f64_f32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> %x, <vscale x 2 x i1> %pg, <vscale x 4 x float> %y)
+ ret <vscale x 2 x double> %0
+}
diff --git a/llvm/test/CodeGen/AArch64/zeroing-forms-fcvtlt-fcvtx.ll b/llvm/test/CodeGen/AArch64/zeroing-forms-fcvtlt-fcvtx.ll
index 60879b1529230f..c7431e11c21ca3 100644
--- a/llvm/test/CodeGen/AArch64/zeroing-forms-fcvtlt-fcvtx.ll
+++ b/llvm/test/CodeGen/AArch64/zeroing-forms-fcvtlt-fcvtx.ll
@@ -18,7 +18,7 @@ define <vscale x 4 x float> @test_svcvtlt_f32_f16_x_1(<vscale x 4 x i1> %pg, <vs
; CHECK-2p2-NEXT: fcvtlt z0.s, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 4 x float> %0
}
@@ -33,7 +33,7 @@ define <vscale x 4 x float> @test_svcvtlt_f32_f16_x_2(<vscale x 4 x i1> %pg, dou
; CHECK-2p2-NEXT: fcvtlt z0.s, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 4 x float> %0
}
@@ -64,7 +64,7 @@ define <vscale x 2 x double> @test_svcvtlt_f64_f32_x_1(<vscale x 2 x i1> %pg, <v
; CHECK-2p2-NEXT: fcvtlt z0.d, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 2 x double> %0
}
@@ -79,7 +79,7 @@ define <vscale x 2 x double> @test_svcvtlt_f64_f32_x_2(<vscale x 2 x i1> %pg, do
; CHECK-2p2-NEXT: fcvtlt z0.d, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 2 x double> %0
}
@@ -110,7 +110,7 @@ define <vscale x 4 x float> @test_svcvtx_f32_f64_x_1(<vscale x 2 x i1> %pg, <vsc
; CHECK-2p2-NEXT: fcvtx z0.s, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 4 x float> %0
}
@@ -125,7 +125,7 @@ define <vscale x 4 x float> @test_svcvtx_f32_f64_x_2(<vscale x 2 x i1> %pg, doub
; CHECK-2p2-NEXT: fcvtx z0.s, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 4 x float> %0
}
@@ -144,3 +144,114 @@ entry:
%0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 4 x float> %0
}
+
+define <vscale x 4 x float> @test_svcvtlt_f32_f16_ptrue_u(double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_svcvtlt_f32_f16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvtlt z0.s, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvtlt_f32_f16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fcvtlt z0.s, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_svcvtlt_f32_f16_ptrue(double %z0, <vscale x 4 x float> %x, <vscale x 8 x half> %y) {
+; CHECK-LABEL: test_svcvtlt_f32_f16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvtlt z0.s, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvtlt_f32_f16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fcvtlt z0.s, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> %x, <vscale x 4 x i1> %pg, <vscale x 8 x half> %y)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 2 x double> @test_svcvtlt_f64_f32_ptrue_u(double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_svcvtlt_f64_f32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtlt z0.d, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvtlt_f64_f32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtlt z0.d, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_svcvtlt_f64_f32_ptrue(double %z0, <vscale x 2 x double> %x, <vscale x 4 x float> %y) {
+; CHECK-LABEL: test_svcvtlt_f64_f32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtlt z0.d, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvtlt_f64_f32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtlt z0.d, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> %x, <vscale x 2 x i1> %pg, <vscale x 4 x float> %y)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 4 x float> @test_svcvtx_f32_f64_ptrue_u(double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_svcvtx_f32_f64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtx z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvtx_f32_f64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtx z0.s, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_svcvtx_f32_f64_ptrue(double %z0, <vscale x 4 x float> %x, <vscale x 2 x double> %y) {
+; CHECK-LABEL: test_svcvtx_f32_f64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtx z0.s, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvtx_f32_f64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtx z0.s, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> %x, <vscale x 2 x i1> %pg, <vscale x 2 x double> %y)
+ ret <vscale x 4 x float> %0
+}
diff --git a/llvm/test/CodeGen/AArch64/zeroing-forms-fcvtzsu.ll b/llvm/test/CodeGen/AArch64/zeroing-forms-fcvtzsu.ll
index b8b36d390330af..7259502bf44002 100644
--- a/llvm/test/CodeGen/AArch64/zeroing-forms-fcvtzsu.ll
+++ b/llvm/test/CodeGen/AArch64/zeroing-forms-fcvtzsu.ll
@@ -18,7 +18,7 @@ define <vscale x 4 x i32> @test_fcvtzs_s32_f64_x_1(<vscale x 2 x i1> %pg, <vscal
; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f64(<vscale x 4 x i32> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f64(<vscale x 4 x i32> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 4 x i32> %0
}
@@ -33,7 +33,7 @@ define <vscale x 4 x i32> @test_fcvtzs_s32_f64_x_2(<vscale x 2 x i1> %pg, double
; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f64(<vscale x 4 x i32> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f64(<vscale x 4 x i32> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 4 x i32> %0
}
@@ -64,7 +64,7 @@ define <vscale x 2 x i64> @test_fcvtzs_s64_f32_x_1(<vscale x 2 x i1> %pg, <vscal
; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f32(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f32(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 2 x i64> %0
}
@@ -79,7 +79,7 @@ define <vscale x 2 x i64> @test_fcvtzs_s64_f32_x_2(<vscale x 2 x i1> %pg, double
; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f32(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f32(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 2 x i64> %0
}
@@ -110,7 +110,7 @@ define <vscale x 4 x i32> @test_fcvtzs_s32_f16_x_1(<vscale x 4 x i1> %pg, <vscal
; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f16(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f16(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 4 x i32> %0
}
@@ -125,7 +125,7 @@ define <vscale x 4 x i32> @test_fcvtzs_s32_f16_x_2(<vscale x 4 x i1> %pg, double
; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f16(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f16(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 4 x i32> %0
}
@@ -156,7 +156,7 @@ define <vscale x 2 x i64> @test_fcvtzs_s64_f16_x_1(<vscale x 2 x i1> %pg, <vscal
; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f16(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f16(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 2 x i64> %0
}
@@ -171,7 +171,7 @@ define <vscale x 2 x i64> @test_fcvtzs_s64_f16_x_2(<vscale x 2 x i1> %pg, double
; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f16(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f16(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 2 x i64> %0
}
@@ -202,7 +202,7 @@ define <vscale x 4 x i32> @test_fcvtzu_u32_f64_x_1(<vscale x 2 x i1> %pg, <vscal
; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f64(<vscale x 4 x i32> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f64(<vscale x 4 x i32> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 4 x i32> %0
}
@@ -217,7 +217,7 @@ define <vscale x 4 x i32> @test_fcvtzu_u32_f64_x_2(<vscale x 2 x i1> %pg, double
; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f64(<vscale x 4 x i32> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f64(<vscale x 4 x i32> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 4 x i32> %0
}
@@ -248,7 +248,7 @@ define <vscale x 2 x i64> @test_fcvtzu_u64_f32_x_1(<vscale x 2 x i1> %pg, <vscal
; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f32(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f32(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 2 x i64> %0
}
@@ -263,7 +263,7 @@ define <vscale x 2 x i64> @test_fcvtzu_u64_f32_x_2(<vscale x 2 x i1> %pg, double
; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f32(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f32(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 2 x i64> %0
}
@@ -294,7 +294,7 @@ define <vscale x 4 x i32> @test_fcvtzu_u32_f16_x_1(<vscale x 4 x i1> %pg, <vscal
; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f16(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f16(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 4 x i32> %0
}
@@ -309,7 +309,7 @@ define <vscale x 4 x i32> @test_fcvtzu_u32_f16_x_2(<vscale x 4 x i1> %pg, double
; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f16(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f16(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 4 x i32> %0
}
@@ -340,7 +340,7 @@ define <vscale x 2 x i64> @test_fcvtzu_u64_f16_x_1(<vscale x 2 x i1> %pg, <vscal
; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f16(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f16(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 2 x i64> %0
}
@@ -355,7 +355,7 @@ define <vscale x 2 x i64> @test_fcvtzu_u64_f16_x_2(<vscale x 2 x i1> %pg, double
; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f16(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f16(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 2 x i64> %0
}
@@ -387,7 +387,7 @@ define <vscale x 8 x i16> @test_svcvt_s16_f16_x_1(<vscale x 8 x i1> %pg, <vscale
; CHECK-2p2-NEXT: fcvtzs z0.h, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 8 x i16> %0
}
@@ -403,7 +403,7 @@ define <vscale x 8 x i16> @test_svcvt_s16_f16_x_2(<vscale x 8 x i1> %pg, double
; CHECK-2p2-NEXT: fcvtzs z0.h, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 8 x i16> %0
}
@@ -434,7 +434,7 @@ define <vscale x 8 x i16> @test_svcvt_u16_f16_x_1(<vscale x 8 x i1> %pg, <vscale
; CHECK-2p2-NEXT: fcvtzu z0.h, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 8 x i16> %0
}
@@ -450,7 +450,7 @@ define <vscale x 8 x i16> @test_svcvt_u16_f16_x_2(<vscale x 8 x i1> %pg, double
; CHECK-2p2-NEXT: fcvtzu z0.h, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 8 x i16> %0
}
@@ -481,7 +481,7 @@ define <vscale x 4 x i32> @test_svcvt_s32_f32_x_1(<vscale x 4 x i1> %pg, <vscale
; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 4 x i32> %0
}
@@ -497,7 +497,7 @@ define <vscale x 4 x i32> @test_svcvt_s32_f32_x_2(<vscale x 4 x i1> %pg, double
; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 4 x i32> %0
}
@@ -528,7 +528,7 @@ define <vscale x 4 x i32> @test_svcvt_u32_f32_x_1(<vscale x 4 x i1> %pg, <vscale
; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 4 x i32> %0
}
@@ -544,7 +544,7 @@ define <vscale x 4 x i32> @test_svcvt_u32_f32_x_2(<vscale x 4 x i1> %pg, double
; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 4 x i32> %0
}
@@ -575,7 +575,7 @@ define <vscale x 2 x i64> @test_svcvt_s64_f64_x_1(<vscale x 2 x i1> %pg, <vscale
; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 2 x i64> %0
}
@@ -591,7 +591,7 @@ define <vscale x 2 x i64> @test_svcvt_s64_f64_x_2(<vscale x 2 x i1> %pg, double
; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 2 x i64> %0
}
@@ -622,7 +622,7 @@ define <vscale x 2 x i64> @test_svcvt_u64_f64_x_1(<vscale x 2 x i1> %pg, <vscale
; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 2 x i64> %0
}
@@ -638,7 +638,7 @@ define <vscale x 2 x i64> @test_svcvt_u64_f64_x_2(<vscale x 2 x i1> %pg, double
; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 2 x i64> %0
}
@@ -657,3 +657,527 @@ entry:
%0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 2 x i64> %0
}
+
+define <vscale x 4 x i32> @test_fcvtzs_i32_f64_ptrue_u(double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_fcvtzs_i32_f64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzs_i32_f64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f64(<vscale x 4 x i32> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_fcvtzs_i32_f64_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 2 x double> %y) {
+; CHECK-LABEL: test_fcvtzs_i32_f64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzs_i32_f64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f64(<vscale x 4 x i32> %x, <vscale x 2 x i1> %pg, <vscale x 2 x double> %y)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_fcvtzu_i32_f64_ptrue_u(double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_fcvtzu_i32_f64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzu_i32_f64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f64(<vscale x 4 x i32> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_fcvtzu_i32_f64_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 2 x double> %y) {
+; CHECK-LABEL: test_fcvtzu_i32_f64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzu_i32_f64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f64(<vscale x 4 x i32> %x, <vscale x 2 x i1> %pg, <vscale x 2 x double> %y)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_fcvtzs_i64_f32_ptrue_u(double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_fcvtzs_i64_f32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzs_i64_f32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f32(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_fcvtzs_i64_f32_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 4 x float> %y) {
+; CHECK-LABEL: test_fcvtzs_i64_f32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzs_i64_f32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f32(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 4 x float> %y)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_fcvtzu_i64_f32_ptrue_u(double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_fcvtzu_i64_f32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzu_i64_f32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f32(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_fcvtzu_i64_f32_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 4 x float> %y) {
+; CHECK-LABEL: test_fcvtzu_i64_f32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzu_i64_f32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f32(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 4 x float> %y)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 4 x i32> @test_fcvtzs_i32_f16_ptrue_u(double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_fcvtzs_i32_f16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzs_i32_f16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f16(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_fcvtzs_i32_f16_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 8 x half> %y) {
+; CHECK-LABEL: test_fcvtzs_i32_f16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzs_i32_f16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f16(<vscale x 4 x i32> %x, <vscale x 4 x i1> %pg, <vscale x 8 x half> %y)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_fcvtzu_i32_f16_ptrue_u(double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_fcvtzu_i32_f16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzu_i32_f16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f16(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_fcvtzu_i32_f16_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 8 x half> %y) {
+; CHECK-LABEL: test_fcvtzu_i32_f16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzu_i32_f16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f16(<vscale x 4 x i32> %x, <vscale x 4 x i1> %pg, <vscale x 8 x half> %y)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_fcvtzs_i64_f16_ptrue_u(double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_fcvtzs_i64_f16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzs_i64_f16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f16(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_fcvtzs_i64_f16_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 8 x half> %y) {
+; CHECK-LABEL: test_fcvtzs_i64_f16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzs_i64_f16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f16(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 8 x half> %y)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_fcvtzu_i64_f16_ptrue_u(double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_fcvtzu_i64_f16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzu_i64_f16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f16(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_fcvtzu_i64_f16_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 8 x half> %y) {
+; CHECK-LABEL: test_fcvtzu_i64_f16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzu_i64_f16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f16(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 8 x half> %y)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 8 x i16> @test_fcvtzs_i16_f16_ptrue_u(double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_fcvtzs_i16_f16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fcvtzs z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzs_i16_f16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: fcvtzs z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_fcvtzs_i16_f16_ptrue(double %z0, <vscale x 8 x i16> %x, <vscale x 8 x half> %y) {
+; CHECK-LABEL: test_fcvtzs_i16_f16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fcvtzs z0.h, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzs_i16_f16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: fcvtzs z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16(<vscale x 8 x i16> %x, <vscale x 8 x i1> %pg, <vscale x 8 x half> %y)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_fcvtzu_i16_f16_ptrue_u(double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_fcvtzu_i16_f16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fcvtzu z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzu_i16_f16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: fcvtzu z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_fcvtzu_i16_f16_ptrue(double %z0, <vscale x 8 x i16> %x, <vscale x 8 x half> %y) {
+; CHECK-LABEL: test_fcvtzu_i16_f16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fcvtzu z0.h, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzu_i16_f16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: fcvtzu z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16(<vscale x 8 x i16> %x, <vscale x 8 x i1> %pg, <vscale x 8 x half> %y)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_fcvtzs_i32_f32_ptrue_u(double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_fcvtzs_i32_f32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzs_i32_f32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_fcvtzs_i32_f32_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 4 x float> %y) {
+; CHECK-LABEL: test_fcvtzs_i32_f32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzs_i32_f32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32(<vscale x 4 x i32> %x, <vscale x 4 x i1> %pg, <vscale x 4 x float> %y)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_fcvtzu_i32_f32_ptrue_u(double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_fcvtzu_i32_f32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzu_i32_f32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_fcvtzu_i32_f32_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 4 x float> %y) {
+; CHECK-LABEL: test_fcvtzu_i32_f32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzu_i32_f32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32(<vscale x 4 x i32> %x, <vscale x 4 x i1> %pg, <vscale x 4 x float> %y)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_fcvtzs_i64_f64_ptrue_u(double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_fcvtzs_i64_f64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzs_i64_f64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_fcvtzs_i64_f64_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 2 x double> %y) {
+; CHECK-LABEL: test_fcvtzs_i64_f64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzs_i64_f64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 2 x double> %y)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_fcvtzu_i64_f64_ptrue_u(double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_fcvtzu_i64_f64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzu_i64_f64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_fcvtzu_i64_f64_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 2 x double> %y) {
+; CHECK-LABEL: test_fcvtzu_i64_f64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzu_i64_f64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 2 x double> %y)
+ ret <vscale x 2 x i64> %0
+}
>From 335fe0bfa26145b4723dfff475d5c6c33e090e28 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Tue, 19 Nov 2024 11:53:56 +0000
Subject: [PATCH 2/8] [AArch64] Generate zeroing forms of certain SVE2.2
instructions (5/11)
---
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 4 +-
llvm/lib/Target/AArch64/SVEInstrFormats.td | 11 +-
.../CodeGen/AArch64/zeroing-forms-uscvtf.ll | 658 ++++++++++++++++++
3 files changed, 670 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/zeroing-forms-uscvtf.ll
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 1e297eb5feb266..23d626fe786630 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4277,8 +4277,8 @@ let Predicates = [HasSVE2p2orSME2p2] in {
defm FCVTZS_ZPzZ : sve_fp_z2op_p_zd_d<0b0, "fcvtzs", "int_aarch64_sve_fcvtzs", AArch64fcvtzs_mt>;
defm FCVTZU_ZPzZ : sve_fp_z2op_p_zd_d<0b1, "fcvtzu", "int_aarch64_sve_fcvtzu", AArch64fcvtzu_mt>;
// Integer convert to floating-point, zeroing predicate
- defm SCVTF_ZPzZ : sve_fp_z2op_p_zd_c<0b0, "scvtf">;
- defm UCVTF_ZPzZ : sve_fp_z2op_p_zd_c<0b1, "ucvtf">;
+ defm SCVTF_ZPzZ : sve_fp_z2op_p_zd_c<0b0, "scvtf", "int_aarch64_sve_scvtf", AArch64scvtf_mt>;
+ defm UCVTF_ZPzZ : sve_fp_z2op_p_zd_c<0b1, "ucvtf", "int_aarch64_sve_ucvtf", AArch64ucvtf_mt>;
// Signed integer base 2 logarithm of fp value, zeroing predicate
defm FLOGB_ZPzZ : sve_fp_z2op_p_zd_d_flogb<"flogb">;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index a1bce3601aab60..027f83a6a50442 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -3323,7 +3323,7 @@ multiclass sve_fp_z2op_p_zd_d<bit U, string asm, string int_op, SDPatternOperato
defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, ir_op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoD)>;
}
-multiclass sve_fp_z2op_p_zd_c<bit U, string asm> {
+multiclass sve_fp_z2op_p_zd_c<bit U, string asm, string int_op, SDPatternOperator ir_op> {
def _HtoH : sve_fp_z2op_p_zd<{ 0b011001, U }, asm, ZPR16, ZPR16>;
def _StoH : sve_fp_z2op_p_zd<{ 0b011010, U }, asm, ZPR32, ZPR16>;
def _StoS : sve_fp_z2op_p_zd<{ 0b101010, U }, asm, ZPR32, ZPR32>;
@@ -3331,6 +3331,15 @@ multiclass sve_fp_z2op_p_zd_c<bit U, string asm> {
def _DtoS : sve_fp_z2op_p_zd<{ 0b111010, U }, asm, ZPR64, ZPR32>;
def _DtoH : sve_fp_z2op_p_zd<{ 0b011011, U }, asm, ZPR64, ZPR16>;
def _DtoD : sve_fp_z2op_p_zd<{ 0b111011, U }, asm, ZPR64, ZPR64>;
+
+ def : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(int_op # _f32i64), nxv4f32, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _DtoS)>;
+ def : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(int_op # _f64i32), nxv2f64, nxv2i1, nxv4i32, !cast<Instruction>(NAME # _StoD)>;
+ def : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(int_op # _f16i32), nxv8f16, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _StoH)>;
+ def : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(int_op # _f16i64), nxv8f16, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _DtoH)>;
+
+ def : SVE_1_Op_PassthruUndefZero_Pat<nxv8f16, ir_op, nxv8i1,nxv8i16, !cast<Instruction>(NAME # _HtoH)>;
+ def : SVE_1_Op_PassthruUndefZero_Pat<nxv4f32, ir_op, nxv4i1,nxv4i32, !cast<Instruction>(NAME # _StoS)>;
+ def : SVE_1_Op_PassthruUndefZero_Pat<nxv2f64, ir_op, nxv2i1,nxv2i64, !cast<Instruction>(NAME # _DtoD)>;
}
multiclass sve_fp_z2op_p_zd_d_flogb<string asm> {
diff --git a/llvm/test/CodeGen/AArch64/zeroing-forms-uscvtf.ll b/llvm/test/CodeGen/AArch64/zeroing-forms-uscvtf.ll
new file mode 100644
index 00000000000000..1ca3b1a6e31cb9
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/zeroing-forms-uscvtf.ll
@@ -0,0 +1,658 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mattr=+sve2p2 < %s | FileCheck %s -check-prefix CHECK-2p2
+
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -mattr=+sme2p2 -force-streaming < %s | FileCheck %s -check-prefix CHECK-2p2
+
+target triple = "aarch64-linux"
+
+define <vscale x 4 x float> @test_scvtf_f32_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_scvtf_f32_s64_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf z0.s, p0/m, z0.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f32_s64_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: scvtf z0.s, p0/z, z0.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_scvtf_f32_s64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_scvtf_f32_s64_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f32_s64_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: scvtf z0.s, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_scvtf_f32_s64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_scvtf_f32_s64_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: scvtf z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f32_s64_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: scvtf z0.s, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 2 x double> @test_scvtf_f64_s32_x_1(<vscale x 2 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_scvtf_f64_s32_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf z0.d, p0/m, z0.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f64_s32_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: scvtf z0.d, p0/z, z0.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_scvtf_f64_s32_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_scvtf_f64_s32_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf z0.d, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f64_s32_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: scvtf z0.d, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_scvtf_f64_s32_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_scvtf_f64_s32_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, #0 // =0x0
+; CHECK-NEXT: scvtf z0.d, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f64_s32_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: scvtf z0.d, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 8 x half> @test_scvtf_f16_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_scvtf_f16_s32_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf z0.h, p0/m, z0.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f16_s32_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: scvtf z0.h, p0/z, z0.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_scvtf_f16_s32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_scvtf_f16_s32_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf z0.h, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f16_s32_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: scvtf z0.h, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_scvtf_f16_s32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_scvtf_f16_s32_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: scvtf z0.h, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f16_s32_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: scvtf z0.h, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_scvtf_f16_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_scvtf_f16_s64_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf z0.h, p0/m, z0.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f16_s64_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: scvtf z0.h, p0/z, z0.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_scvtf_f16_s64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_scvtf_f16_s64_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf z0.h, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f16_s64_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: scvtf z0.h, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_scvtf_f16_s64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_scvtf_f16_s64_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: scvtf z0.h, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f16_s64_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: scvtf z0.h, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 4 x float> @test_ucvtf_f32_u64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_ucvtf_f32_u64_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f32_u64_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ucvtf z0.s, p0/z, z0.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_ucvtf_f32_u64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_ucvtf_f32_u64_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ucvtf z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f32_u64_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ucvtf z0.s, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_ucvtf_f32_u64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_ucvtf_f32_u64_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: ucvtf z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f32_u64_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ucvtf z0.s, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 2 x double> @test_ucvtf_f64_u32_x_1(<vscale x 2 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_ucvtf_f64_u32_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ucvtf z0.d, p0/m, z0.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f64_u32_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ucvtf z0.d, p0/z, z0.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_ucvtf_f64_u32_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_ucvtf_f64_u32_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ucvtf z0.d, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f64_u32_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ucvtf z0.d, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_ucvtf_f64_u32_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_ucvtf_f64_u32_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, #0 // =0x0
+; CHECK-NEXT: ucvtf z0.d, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f64_u32_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ucvtf z0.d, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 8 x half> @test_ucvtf_f16_u32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_ucvtf_f16_u32_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f16_u32_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ucvtf z0.h, p0/z, z0.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_ucvtf_f16_u32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_ucvtf_f16_u32_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ucvtf z0.h, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f16_u32_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ucvtf z0.h, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_ucvtf_f16_u32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_ucvtf_f16_u32_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: ucvtf z0.h, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f16_u32_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ucvtf z0.h, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_ucvtf_f16_u64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_ucvtf_f16_u64_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f16_u64_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ucvtf z0.h, p0/z, z0.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_ucvtf_f16_u64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_ucvtf_f16_u64_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ucvtf z0.h, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f16_u64_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ucvtf z0.h, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_ucvtf_f16_u64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_ucvtf_f16_u64_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: ucvtf z0.h, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f16_u64_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ucvtf z0.h, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_svcvt_f16_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcvt_f16_s16_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f16_s16_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: scvtf z0.h, p0/z, z0.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_svcvt_f16_s16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcvt_f16_s16_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: scvtf z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f16_s16_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: scvtf z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_svcvt_f16_s16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcvt_f16_s16_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: scvtf z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f16_s16_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: scvtf z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_svcvt_f16_u16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcvt_f16_u16_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f16_u16_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ucvtf z0.h, p0/z, z0.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(<vscale x 8 x half> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_svcvt_f16_u16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcvt_f16_u16_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: ucvtf z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f16_u16_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ucvtf z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(<vscale x 8 x half> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_svcvt_f16_u16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcvt_f16_u16_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: ucvtf z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f16_u16_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ucvtf z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 4 x float> @test_svcvt_f32_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcvt_f32_s32_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f32_s32_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: scvtf z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_svcvt_f32_s32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcvt_f32_s32_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: scvtf z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f32_s32_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: scvtf z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_svcvt_f32_s32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcvt_f32_s32_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: scvtf z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f32_s32_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: scvtf z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_svcvt_f32_u32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcvt_f32_u32_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f32_u32_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ucvtf z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.nxv4f32.nxv4i32(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_svcvt_f32_u32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcvt_f32_u32_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: ucvtf z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f32_u32_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ucvtf z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.nxv4f32.nxv4i32(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_svcvt_f32_u32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcvt_f32_u32_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: ucvtf z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f32_u32_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ucvtf z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.nxv4f32.nxv4i32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 2 x double> @test_svcvt_f64_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcvt_f64_s64_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f64_s64_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: scvtf z0.d, p0/z, z0.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.nxv2f64.nxv2i64(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_svcvt_f64_s64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcvt_f64_s64_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: scvtf z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f64_s64_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: scvtf z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.nxv2f64.nxv2i64(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_svcvt_f64_s64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcvt_f64_s64_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, #0 // =0x0
+; CHECK-NEXT: scvtf z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f64_s64_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: scvtf z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.nxv2f64.nxv2i64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_svcvt_f64_u64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcvt_f64_u64_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f64_u64_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ucvtf z0.d, p0/z, z0.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.nxv2f64.nxv2i64(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_svcvt_f64_u64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcvt_f64_u64_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: ucvtf z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f64_u64_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ucvtf z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.nxv2f64.nxv2i64(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_svcvt_f64_u64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcvt_f64_u64_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, #0 // =0x0
+; CHECK-NEXT: ucvtf z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f64_u64_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ucvtf z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.nxv2f64.nxv2i64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x double> %0
+}
>From 983d8f12cbb6577bebaeea904b7270753966c68f Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Fri, 20 Dec 2024 14:41:57 +0000
Subject: [PATCH 3/8] [fixup] Rebase and add tests for the new all-true
patterns
---
llvm/lib/Target/AArch64/SVEInstrFormats.td | 16 +-
.../CodeGen/AArch64/zeroing-forms-uscvtf.ll | 525 ++++++++++++++++++
2 files changed, 533 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 027f83a6a50442..64d0d3749efdc4 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -3332,14 +3332,14 @@ multiclass sve_fp_z2op_p_zd_c<bit U, string asm, string int_op, SDPatternOperato
def _DtoH : sve_fp_z2op_p_zd<{ 0b011011, U }, asm, ZPR64, ZPR16>;
def _DtoD : sve_fp_z2op_p_zd<{ 0b111011, U }, asm, ZPR64, ZPR64>;
- def : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(int_op # _f32i64), nxv4f32, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _DtoS)>;
- def : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(int_op # _f64i32), nxv2f64, nxv2i1, nxv4i32, !cast<Instruction>(NAME # _StoD)>;
- def : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(int_op # _f16i32), nxv8f16, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _StoH)>;
- def : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(int_op # _f16i64), nxv8f16, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _DtoH)>;
-
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv8f16, ir_op, nxv8i1,nxv8i16, !cast<Instruction>(NAME # _HtoH)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv4f32, ir_op, nxv4i1,nxv4i32, !cast<Instruction>(NAME # _StoS)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv2f64, ir_op, nxv2i1,nxv2i64, !cast<Instruction>(NAME # _DtoD)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(int_op # _f32i64), nxv4f32, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _DtoS)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(int_op # _f64i32), nxv2f64, nxv2i1, nxv4i32, !cast<Instruction>(NAME # _StoD)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(int_op # _f16i32), nxv8f16, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _StoH)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(int_op # _f16i64), nxv8f16, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _DtoH)>;
+
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv8f16, ir_op, nxv8i1,nxv8i16, !cast<Instruction>(NAME # _HtoH)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4f32, ir_op, nxv4i1,nxv4i32, !cast<Instruction>(NAME # _StoS)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2f64, ir_op, nxv2i1,nxv2i64, !cast<Instruction>(NAME # _DtoD)>;
}
multiclass sve_fp_z2op_p_zd_d_flogb<string asm> {
diff --git a/llvm/test/CodeGen/AArch64/zeroing-forms-uscvtf.ll b/llvm/test/CodeGen/AArch64/zeroing-forms-uscvtf.ll
index 1ca3b1a6e31cb9..06c87a70c19192 100644
--- a/llvm/test/CodeGen/AArch64/zeroing-forms-uscvtf.ll
+++ b/llvm/test/CodeGen/AArch64/zeroing-forms-uscvtf.ll
@@ -656,3 +656,528 @@ entry:
%0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.nxv2f64.nxv2i64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 2 x double> %0
}
+
+define <vscale x 4 x float> @test_scvtf_f32_i64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_scvtf_f32_i64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: scvtf z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f32_i64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: scvtf z0.s, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_scvtf_f32_i64_ptrue(double %z0,<vscale x 4 x float> %x, <vscale x 2 x i64> %y) {
+; CHECK-LABEL: test_scvtf_f32_i64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: scvtf z0.s, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f32_i64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: scvtf z0.s, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 2 x double> @test_scvtf_f64_i32_ptrue_u(double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_scvtf_f64_i32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: scvtf z0.d, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f64_i32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: scvtf z0.d, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_scvtf_f64_i32_ptrue(double %z0,<vscale x 2 x double> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: test_scvtf_f64_i32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: scvtf z0.d, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f64_i32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: scvtf z0.d, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double> %x, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %y)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 8 x half> @test_scvtf_f16_i32_ptrue_u(double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_scvtf_f16_i32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: scvtf z0.h, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f16_i32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: scvtf z0.h, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_scvtf_f16_i32_ptrue(double %z0,<vscale x 8 x half> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: test_scvtf_f16_i32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: scvtf z0.h, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f16_i32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: scvtf z0.h, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> %x, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %y)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_scvtf_f16_i64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_scvtf_f16_i64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: scvtf z0.h, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f16_i64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: scvtf z0.h, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_scvtf_f16_i64_ptrue(double %z0,<vscale x 8 x half> %x, <vscale x 2 x i64> %y) {
+; CHECK-LABEL: test_scvtf_f16_i64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: scvtf z0.h, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_scvtf_f16_i64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: scvtf z0.h, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 4 x float> @test_ucvtf_f32_i64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_ucvtf_f32_i64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ucvtf z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f32_i64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: ucvtf z0.s, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_ucvtf_f32_i64_ptrue(double %z0,<vscale x 4 x float> %x, <vscale x 2 x i64> %y) {
+; CHECK-LABEL: test_ucvtf_f32_i64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ucvtf z0.s, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f32_i64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: ucvtf z0.s, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 2 x double> @test_ucvtf_f64_i32_ptrue_u(double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_ucvtf_f64_i32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ucvtf z0.d, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f64_i32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: ucvtf z0.d, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_ucvtf_f64_i32_ptrue(double %z0,<vscale x 2 x double> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: test_ucvtf_f64_i32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ucvtf z0.d, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f64_i32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: ucvtf z0.d, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double> %x, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %y)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 8 x half> @test_ucvtf_f16_i32_ptrue_u(double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_ucvtf_f16_i32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ucvtf z0.h, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f16_i32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: ucvtf z0.h, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_ucvtf_f16_i32_ptrue(double %z0,<vscale x 8 x half> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: test_ucvtf_f16_i32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ucvtf z0.h, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f16_i32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: ucvtf z0.h, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> %x, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %y)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_ucvtf_f16_i64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_ucvtf_f16_i64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ucvtf z0.h, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f16_i64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: ucvtf z0.h, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_ucvtf_f16_i64_ptrue(double %z0,<vscale x 8 x half> %x, <vscale x 2 x i64> %y) {
+; CHECK-LABEL: test_ucvtf_f16_i64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: ucvtf z0.h, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_f16_i64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: ucvtf z0.h, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_scvtf_nxv8f16_nxv8i16_ptrue_u(double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_scvtf_nxv8f16_nxv8i16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: scvtf z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_scvtf_nxv8f16_nxv8i16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: scvtf z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_scvtf_nxv8f16_nxv8i16_ptrue(double %z0,<vscale x 8 x half> %x, <vscale x 8 x i16> %y) {
+; CHECK-LABEL: test_scvtf_nxv8f16_nxv8i16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: scvtf z0.h, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_scvtf_nxv8f16_nxv8i16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: scvtf z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half> %x, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %y)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_ucvtf_nxv8f16_nxv8i16_ptrue_u(double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_ucvtf_nxv8f16_nxv8i16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: ucvtf z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_nxv8f16_nxv8i16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: ucvtf z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(<vscale x 8 x half> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_ucvtf_nxv8f16_nxv8i16_ptrue(double %z0,<vscale x 8 x half> %x, <vscale x 8 x i16> %y) {
+; CHECK-LABEL: test_ucvtf_nxv8f16_nxv8i16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: ucvtf z0.h, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_nxv8f16_nxv8i16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: ucvtf z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(<vscale x 8 x half> %x, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %y)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 4 x float> @test_scvtf_nxv4f32_nxv4i32_ptrue_u(double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_scvtf_nxv4f32_nxv4i32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: scvtf z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_scvtf_nxv4f32_nxv4i32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: scvtf z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_scvtf_nxv4f32_nxv4i32_ptrue(double %z0,<vscale x 4 x float> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: test_scvtf_nxv4f32_nxv4i32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: scvtf z0.s, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_scvtf_nxv4f32_nxv4i32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: scvtf z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32(<vscale x 4 x float> %x, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %y)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_ucvtf_nxv4f32_nxv4i32_ptrue_u(double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_ucvtf_nxv4f32_nxv4i32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: ucvtf z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_nxv4f32_nxv4i32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: ucvtf z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.nxv4f32.nxv4i32(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_ucvtf_nxv4f32_nxv4i32_ptrue(double %z0,<vscale x 4 x float> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: test_ucvtf_nxv4f32_nxv4i32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: ucvtf z0.s, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_nxv4f32_nxv4i32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: ucvtf z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.nxv4f32.nxv4i32(<vscale x 4 x float> %x, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %y)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 2 x double> @test_scvtf_nxvf64_nxv2i64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_scvtf_nxvf64_nxv2i64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: scvtf z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_scvtf_nxvf64_nxv2i64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: scvtf z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.nxvf64.nxv2i64(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_scvtf_nxvf64_nxv2i64_ptrue(double %z0,<vscale x 2 x double> %x, <vscale x 2 x i64> %y) {
+; CHECK-LABEL: test_scvtf_nxvf64_nxv2i64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: scvtf z0.d, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_scvtf_nxvf64_nxv2i64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: scvtf z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.nxvf64.nxv2i64(<vscale x 2 x double> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_ucvtf_nxvf64_nxv2i64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_ucvtf_nxvf64_nxv2i64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: ucvtf z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_nxvf64_nxv2i64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: ucvtf z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.nxvf64.nxv2i64(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_ucvtf_nxvf64_nxv2i64_ptrue(double %z0,<vscale x 2 x double> %x, <vscale x 2 x i64> %y) {
+; CHECK-LABEL: test_ucvtf_nxvf64_nxv2i64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: ucvtf z0.d, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_ucvtf_nxvf64_nxv2i64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: ucvtf z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.nxvf64.nxv2i64(<vscale x 2 x double> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
+ ret <vscale x 2 x double> %0
+}
+
>From 8b5dba238b6d447557f425e6da01cec3fdb5f0c8 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Fri, 20 Dec 2024 15:08:21 +0000
Subject: [PATCH 4/8] [fixup] Update tests, change undef to poison
---
.../CodeGen/AArch64/zeroing-forms-uscvtf.ll | 112 +++++++++---------
1 file changed, 56 insertions(+), 56 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/zeroing-forms-uscvtf.ll b/llvm/test/CodeGen/AArch64/zeroing-forms-uscvtf.ll
index 06c87a70c19192..fd0126f3166dd3 100644
--- a/llvm/test/CodeGen/AArch64/zeroing-forms-uscvtf.ll
+++ b/llvm/test/CodeGen/AArch64/zeroing-forms-uscvtf.ll
@@ -18,7 +18,7 @@ define <vscale x 4 x float> @test_scvtf_f32_s64_x_1(<vscale x 2 x i1> %pg, <vsca
; CHECK-2p2-NEXT: scvtf z0.s, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 4 x float> %0
}
@@ -33,7 +33,7 @@ define <vscale x 4 x float> @test_scvtf_f32_s64_x_2(<vscale x 2 x i1> %pg, doubl
; CHECK-2p2-NEXT: scvtf z0.s, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 4 x float> %0
}
@@ -64,7 +64,7 @@ define <vscale x 2 x double> @test_scvtf_f64_s32_x_1(<vscale x 2 x i1> %pg, <vsc
; CHECK-2p2-NEXT: scvtf z0.d, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 2 x double> %0
}
@@ -79,7 +79,7 @@ define <vscale x 2 x double> @test_scvtf_f64_s32_x_2(<vscale x 2 x i1> %pg, doub
; CHECK-2p2-NEXT: scvtf z0.d, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 2 x double> %0
}
@@ -110,7 +110,7 @@ define <vscale x 8 x half> @test_scvtf_f16_s32_x_1(<vscale x 4 x i1> %pg, <vscal
; CHECK-2p2-NEXT: scvtf z0.h, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 8 x half> %0
}
@@ -125,7 +125,7 @@ define <vscale x 8 x half> @test_scvtf_f16_s32_x_2(<vscale x 4 x i1> %pg, double
; CHECK-2p2-NEXT: scvtf z0.h, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 8 x half> %0
}
@@ -156,7 +156,7 @@ define <vscale x 8 x half> @test_scvtf_f16_s64_x_1(<vscale x 2 x i1> %pg, <vscal
; CHECK-2p2-NEXT: scvtf z0.h, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 8 x half> %0
}
@@ -171,7 +171,7 @@ define <vscale x 8 x half> @test_scvtf_f16_s64_x_2(<vscale x 2 x i1> %pg, double
; CHECK-2p2-NEXT: scvtf z0.h, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 8 x half> %0
}
@@ -202,7 +202,7 @@ define <vscale x 4 x float> @test_ucvtf_f32_u64_x_1(<vscale x 2 x i1> %pg, <vsca
; CHECK-2p2-NEXT: ucvtf z0.s, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 4 x float> %0
}
@@ -217,7 +217,7 @@ define <vscale x 4 x float> @test_ucvtf_f32_u64_x_2(<vscale x 2 x i1> %pg, doubl
; CHECK-2p2-NEXT: ucvtf z0.s, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 4 x float> %0
}
@@ -248,7 +248,7 @@ define <vscale x 2 x double> @test_ucvtf_f64_u32_x_1(<vscale x 2 x i1> %pg, <vsc
; CHECK-2p2-NEXT: ucvtf z0.d, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 2 x double> %0
}
@@ -263,7 +263,7 @@ define <vscale x 2 x double> @test_ucvtf_f64_u32_x_2(<vscale x 2 x i1> %pg, doub
; CHECK-2p2-NEXT: ucvtf z0.d, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 2 x double> %0
}
@@ -294,7 +294,7 @@ define <vscale x 8 x half> @test_ucvtf_f16_u32_x_1(<vscale x 4 x i1> %pg, <vscal
; CHECK-2p2-NEXT: ucvtf z0.h, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 8 x half> %0
}
@@ -309,7 +309,7 @@ define <vscale x 8 x half> @test_ucvtf_f16_u32_x_2(<vscale x 4 x i1> %pg, double
; CHECK-2p2-NEXT: ucvtf z0.h, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 8 x half> %0
}
@@ -340,7 +340,7 @@ define <vscale x 8 x half> @test_ucvtf_f16_u64_x_1(<vscale x 2 x i1> %pg, <vscal
; CHECK-2p2-NEXT: ucvtf z0.h, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 8 x half> %0
}
@@ -355,7 +355,7 @@ define <vscale x 8 x half> @test_ucvtf_f16_u64_x_2(<vscale x 2 x i1> %pg, double
; CHECK-2p2-NEXT: ucvtf z0.h, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 8 x half> %0
}
@@ -386,7 +386,7 @@ define <vscale x 8 x half> @test_svcvt_f16_s16_x_1(<vscale x 8 x i1> %pg, <vscal
; CHECK-2p2-NEXT: scvtf z0.h, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
ret <vscale x 8 x half> %0
}
@@ -402,7 +402,7 @@ define <vscale x 8 x half> @test_svcvt_f16_s16_x_2(<vscale x 8 x i1> %pg, double
; CHECK-2p2-NEXT: scvtf z0.h, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
ret <vscale x 8 x half> %0
}
@@ -433,7 +433,7 @@ define <vscale x 8 x half> @test_svcvt_f16_u16_x_1(<vscale x 8 x i1> %pg, <vscal
; CHECK-2p2-NEXT: ucvtf z0.h, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(<vscale x 8 x half> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(<vscale x 8 x half> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
ret <vscale x 8 x half> %0
}
@@ -449,7 +449,7 @@ define <vscale x 8 x half> @test_svcvt_f16_u16_x_2(<vscale x 8 x i1> %pg, double
; CHECK-2p2-NEXT: ucvtf z0.h, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(<vscale x 8 x half> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(<vscale x 8 x half> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
ret <vscale x 8 x half> %0
}
@@ -480,7 +480,7 @@ define <vscale x 4 x float> @test_svcvt_f32_s32_x_1(<vscale x 4 x i1> %pg, <vsca
; CHECK-2p2-NEXT: scvtf z0.s, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 4 x float> %0
}
@@ -496,7 +496,7 @@ define <vscale x 4 x float> @test_svcvt_f32_s32_x_2(<vscale x 4 x i1> %pg, doubl
; CHECK-2p2-NEXT: scvtf z0.s, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 4 x float> %0
}
@@ -527,7 +527,7 @@ define <vscale x 4 x float> @test_svcvt_f32_u32_x_1(<vscale x 4 x i1> %pg, <vsca
; CHECK-2p2-NEXT: ucvtf z0.s, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.nxv4f32.nxv4i32(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.nxv4f32.nxv4i32(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 4 x float> %0
}
@@ -543,7 +543,7 @@ define <vscale x 4 x float> @test_svcvt_f32_u32_x_2(<vscale x 4 x i1> %pg, doubl
; CHECK-2p2-NEXT: ucvtf z0.s, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.nxv4f32.nxv4i32(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.nxv4f32.nxv4i32(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 4 x float> %0
}
@@ -574,7 +574,7 @@ define <vscale x 2 x double> @test_svcvt_f64_s64_x_1(<vscale x 2 x i1> %pg, <vsc
; CHECK-2p2-NEXT: scvtf z0.d, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.nxv2f64.nxv2i64(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.nxv2f64.nxv2i64(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 2 x double> %0
}
@@ -590,7 +590,7 @@ define <vscale x 2 x double> @test_svcvt_f64_s64_x_2(<vscale x 2 x i1> %pg, doub
; CHECK-2p2-NEXT: scvtf z0.d, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.nxv2f64.nxv2i64(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.nxv2f64.nxv2i64(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 2 x double> %0
}
@@ -621,7 +621,7 @@ define <vscale x 2 x double> @test_svcvt_f64_u64_x_1(<vscale x 2 x i1> %pg, <vsc
; CHECK-2p2-NEXT: ucvtf z0.d, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.nxv2f64.nxv2i64(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.nxv2f64.nxv2i64(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 2 x double> %0
}
@@ -637,7 +637,7 @@ define <vscale x 2 x double> @test_svcvt_f64_u64_x_2(<vscale x 2 x i1> %pg, doub
; CHECK-2p2-NEXT: ucvtf z0.d, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.nxv2f64.nxv2i64(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.nxv2f64.nxv2i64(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 2 x double> %0
}
@@ -744,7 +744,7 @@ define <vscale x 8 x half> @test_scvtf_f16_i32_ptrue_u(double %z0, <vscale x 4 x
; CHECK-2p2-NEXT: scvtf z0.h, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 8 x half> %0
}
@@ -763,7 +763,7 @@ define <vscale x 8 x half> @test_scvtf_f16_i32_ptrue(double %z0,<vscale x 8 x ha
; CHECK-2p2-NEXT: scvtf z0.h, p0/z, z2.s
; CHECK-2p2-NEXT: ret
entry:
- %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> %x, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %y)
ret <vscale x 8 x half> %0
}
@@ -892,7 +892,7 @@ define <vscale x 8 x half> @test_ucvtf_f16_i32_ptrue_u(double %z0, <vscale x 4 x
; CHECK-2p2-NEXT: ucvtf z0.h, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 8 x half> %0
}
@@ -911,7 +911,7 @@ define <vscale x 8 x half> @test_ucvtf_f16_i32_ptrue(double %z0,<vscale x 8 x ha
; CHECK-2p2-NEXT: ucvtf z0.h, p0/z, z2.s
; CHECK-2p2-NEXT: ret
entry:
- %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> %x, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %y)
ret <vscale x 8 x half> %0
}
@@ -967,7 +967,7 @@ define <vscale x 8 x half> @test_scvtf_nxv8f16_nxv8i16_ptrue_u(double %z0, <vsca
; CHECK-2p2-NEXT: scvtf z0.h, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
ret <vscale x 8 x half> %0
}
@@ -986,7 +986,7 @@ define <vscale x 8 x half> @test_scvtf_nxv8f16_nxv8i16_ptrue(double %z0,<vscale
; CHECK-2p2-NEXT: scvtf z0.h, p0/z, z2.h
; CHECK-2p2-NEXT: ret
entry:
- %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.nxv8f16.nxv8i16(<vscale x 8 x half> %x, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %y)
ret <vscale x 8 x half> %0
}
@@ -1005,7 +1005,7 @@ define <vscale x 8 x half> @test_ucvtf_nxv8f16_nxv8i16_ptrue_u(double %z0, <vsca
; CHECK-2p2-NEXT: ucvtf z0.h, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(<vscale x 8 x half> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
ret <vscale x 8 x half> %0
}
@@ -1024,7 +1024,7 @@ define <vscale x 8 x half> @test_ucvtf_nxv8f16_nxv8i16_ptrue(double %z0,<vscale
; CHECK-2p2-NEXT: ucvtf z0.h, p0/z, z2.h
; CHECK-2p2-NEXT: ret
entry:
- %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.nxv8f16.nxv8i16(<vscale x 8 x half> %x, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %y)
ret <vscale x 8 x half> %0
}
@@ -1043,7 +1043,7 @@ define <vscale x 4 x float> @test_scvtf_nxv4f32_nxv4i32_ptrue_u(double %z0, <vsc
; CHECK-2p2-NEXT: scvtf z0.s, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 4 x float> %0
}
@@ -1062,7 +1062,7 @@ define <vscale x 4 x float> @test_scvtf_nxv4f32_nxv4i32_ptrue(double %z0,<vscale
; CHECK-2p2-NEXT: scvtf z0.s, p0/z, z2.s
; CHECK-2p2-NEXT: ret
entry:
- %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32(<vscale x 4 x float> %x, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %y)
ret <vscale x 4 x float> %0
}
@@ -1081,7 +1081,7 @@ define <vscale x 4 x float> @test_ucvtf_nxv4f32_nxv4i32_ptrue_u(double %z0, <vsc
; CHECK-2p2-NEXT: ucvtf z0.s, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.nxv4f32.nxv4i32(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 4 x float> %0
}
@@ -1100,84 +1100,84 @@ define <vscale x 4 x float> @test_ucvtf_nxv4f32_nxv4i32_ptrue(double %z0,<vscale
; CHECK-2p2-NEXT: ucvtf z0.s, p0/z, z2.s
; CHECK-2p2-NEXT: ret
entry:
- %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.nxv4f32.nxv4i32(<vscale x 4 x float> %x, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %y)
ret <vscale x 4 x float> %0
}
-define <vscale x 2 x double> @test_scvtf_nxvf64_nxv2i64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
-; CHECK-LABEL: test_scvtf_nxvf64_nxv2i64_ptrue_u:
+define <vscale x 2 x double> @test_scvtf_nxv2f64_nxv2i64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_scvtf_nxv2f64_nxv2i64_ptrue_u:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: scvtf z0.d, p0/m, z1.d
; CHECK-NEXT: ret
;
-; CHECK-2p2-LABEL: test_scvtf_nxvf64_nxv2i64_ptrue_u:
+; CHECK-2p2-LABEL: test_scvtf_nxv2f64_nxv2i64_ptrue_u:
; CHECK-2p2: // %bb.0: // %entry
; CHECK-2p2-NEXT: ptrue p0.d
; CHECK-2p2-NEXT: scvtf z0.d, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
%pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.nxvf64.nxv2i64(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.nxv2f64.nxv2i64(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 2 x double> %0
}
-define <vscale x 2 x double> @test_scvtf_nxvf64_nxv2i64_ptrue(double %z0,<vscale x 2 x double> %x, <vscale x 2 x i64> %y) {
-; CHECK-LABEL: test_scvtf_nxvf64_nxv2i64_ptrue:
+define <vscale x 2 x double> @test_scvtf_nxv2f64_nxv2i64_ptrue(double %z0,<vscale x 2 x double> %x, <vscale x 2 x i64> %y) {
+; CHECK-LABEL: test_scvtf_nxv2f64_nxv2i64_ptrue:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: movprfx z0, z2
; CHECK-NEXT: scvtf z0.d, p0/m, z2.d
; CHECK-NEXT: ret
;
-; CHECK-2p2-LABEL: test_scvtf_nxvf64_nxv2i64_ptrue:
+; CHECK-2p2-LABEL: test_scvtf_nxv2f64_nxv2i64_ptrue:
; CHECK-2p2: // %bb.0: // %entry
; CHECK-2p2-NEXT: ptrue p0.d
; CHECK-2p2-NEXT: scvtf z0.d, p0/z, z2.d
; CHECK-2p2-NEXT: ret
entry:
%pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.nxvf64.nxv2i64(<vscale x 2 x double> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.nxv2f64.nxv2i64(<vscale x 2 x double> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
ret <vscale x 2 x double> %0
}
-define <vscale x 2 x double> @test_ucvtf_nxvf64_nxv2i64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
-; CHECK-LABEL: test_ucvtf_nxvf64_nxv2i64_ptrue_u:
+define <vscale x 2 x double> @test_ucvtf_nxv2f64_nxv2i64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_ucvtf_nxv2f64_nxv2i64_ptrue_u:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: ucvtf z0.d, p0/m, z1.d
; CHECK-NEXT: ret
;
-; CHECK-2p2-LABEL: test_ucvtf_nxvf64_nxv2i64_ptrue_u:
+; CHECK-2p2-LABEL: test_ucvtf_nxv2f64_nxv2i64_ptrue_u:
; CHECK-2p2: // %bb.0: // %entry
; CHECK-2p2-NEXT: ptrue p0.d
; CHECK-2p2-NEXT: ucvtf z0.d, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
%pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.nxvf64.nxv2i64(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.nxv2f64.nxv2i64(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 2 x double> %0
}
-define <vscale x 2 x double> @test_ucvtf_nxvf64_nxv2i64_ptrue(double %z0,<vscale x 2 x double> %x, <vscale x 2 x i64> %y) {
-; CHECK-LABEL: test_ucvtf_nxvf64_nxv2i64_ptrue:
+define <vscale x 2 x double> @test_ucvtf_nxv2f64_nxv2i64_ptrue(double %z0,<vscale x 2 x double> %x, <vscale x 2 x i64> %y) {
+; CHECK-LABEL: test_ucvtf_nxv2f64_nxv2i64_ptrue:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: movprfx z0, z2
; CHECK-NEXT: ucvtf z0.d, p0/m, z2.d
; CHECK-NEXT: ret
;
-; CHECK-2p2-LABEL: test_ucvtf_nxvf64_nxv2i64_ptrue:
+; CHECK-2p2-LABEL: test_ucvtf_nxv2f64_nxv2i64_ptrue:
; CHECK-2p2: // %bb.0: // %entry
; CHECK-2p2-NEXT: ptrue p0.d
; CHECK-2p2-NEXT: ucvtf z0.d, p0/z, z2.d
; CHECK-2p2-NEXT: ret
entry:
%pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.nxvf64.nxv2i64(<vscale x 2 x double> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.nxv2f64.nxv2i64(<vscale x 2 x double> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
ret <vscale x 2 x double> %0
}
>From ac5b0926bf850fe4c23bb8f61dec3cc066f11b67 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Tue, 19 Nov 2024 12:17:13 +0000
Subject: [PATCH 5/8] [AArch64] Generate zeroing forms of certain SVE2.2
instructions (6/11)
---
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 10 +-
llvm/lib/Target/AArch64/SVEInstrFormats.td | 7 +-
.../AArch64/zeroing-forms-counts-not.ll | 1152 +++++++++++++++++
3 files changed, 1163 insertions(+), 6 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/zeroing-forms-counts-not.ll
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 23d626fe786630..5c257c436fbeac 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4314,11 +4314,11 @@ let Predicates = [HasSVE2p2orSME2p2] in {
defm FSQRT_ZPZz : sve_fp_z2op_p_zd_hsd<0b01101, "fsqrt">;
// SVE2p2 integer unary arithmetic (bitwise), zeroing predicate
- defm CLS_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b000, "cls">;
- defm CLZ_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b001, "clz">;
- defm CNT_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b010, "cnt">;
- defm CNOT_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b011, "cnot">;
- defm NOT_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b110, "not">;
+ defm CLS_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b000, "cls", AArch64cls_mt>;
+ defm CLZ_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b001, "clz", AArch64clz_mt>;
+ defm CNT_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b010, "cnt", AArch64cnt_mt>;
+ defm CNOT_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b011, "cnot", AArch64cnot_mt>;
+ defm NOT_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b110, " not", AArch64not_mt>;
// floating point
defm FABS_ZPzZ : sve_int_un_pred_arit_bitwise_fp_z<0b100, "fabs", AArch64fabs_mt>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 64d0d3749efdc4..a9a23574de5d1d 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -4992,11 +4992,16 @@ multiclass sve_int_un_pred_arit_bitwise<bits<3> opc, string asm,
defm : SVE_1_Op_PassthruUndef_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Pseudo>(NAME # _D_UNDEF)>;
}
-multiclass sve_int_un_pred_arit_bitwise_z<bits<3> opc, string asm> {
+multiclass sve_int_un_pred_arit_bitwise_z<bits<3> opc, string asm, SDPatternOperator op> {
def _B : sve_int_un_pred_arit_z<0b00, { opc, 0b1 }, asm, ZPR8>;
def _H : sve_int_un_pred_arit_z<0b01, { opc, 0b1 }, asm, ZPR16>;
def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b1 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b1 }, asm, ZPR64>;
+
+ def : SVE_1_Op_PassthruUndefZero_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_un_pred_arit_bitwise_fp<bits<3> opc, string asm,
diff --git a/llvm/test/CodeGen/AArch64/zeroing-forms-counts-not.ll b/llvm/test/CodeGen/AArch64/zeroing-forms-counts-not.ll
new file mode 100644
index 00000000000000..efdf4bd6a5c64b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/zeroing-forms-counts-not.ll
@@ -0,0 +1,1152 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mattr=+bf16,+sve < %s | FileCheck %s
+; RUN: llc -mattr=+bf16,+sve2p2 < %s | FileCheck %s -check-prefix CHECK-2p2
+
+; RUN: llc -mattr=+bf16,+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -mattr=+bf16,+sme2p2 -force-streaming < %s | FileCheck %s -check-prefix CHECK-2p2
+
+target triple = "aarch64-linux"
+
+define <vscale x 16 x i8> @test_svcls_s8_x_1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svcls_s8_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cls z0.b, p0/m, z0.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcls_s8_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cls z0.b, p0/z, z0.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cls.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svcls_s8_x_2(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svcls_s8_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cls z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcls_s8_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cls z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cls.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svcls_s8_z(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svcls_s8_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.b, #0 // =0x0
+; CHECK-NEXT: cls z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcls_s8_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cls z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cls.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 8 x i16> @test_svcls_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcls_s16_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cls z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcls_s16_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cls z0.h, p0/z, z0.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cls.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcls_s16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcls_s16_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cls z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcls_s16_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cls z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cls.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcls_s16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcls_s16_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: cls z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcls_s16_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cls z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cls.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svcls_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcls_s32_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cls z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcls_s32_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cls z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svcls_s32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcls_s32_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cls z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcls_s32_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cls z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svcls_s32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcls_s32_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: cls z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcls_s32_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cls z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svcls_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcls_s64_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cls z0.d, p0/m, z0.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcls_s64_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cls z0.d, p0/z, z0.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svcls_s64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcls_s64_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cls z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcls_s64_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cls z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svcls_s64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcls_s64_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, #0 // =0x0
+; CHECK-NEXT: cls z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcls_s64_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cls z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 16 x i8> @test_svclz_s8_x_1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svclz_s8_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: clz z0.b, p0/m, z0.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svclz_s8_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: clz z0.b, p0/z, z0.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.clz.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svclz_s8_x_2(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svclz_s8_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: clz z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svclz_s8_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: clz z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.clz.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svclz_s8_z(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svclz_s8_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.b, #0 // =0x0
+; CHECK-NEXT: clz z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svclz_s8_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: clz z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.clz.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 8 x i16> @test_svclz_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svclz_s16_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: clz z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svclz_s16_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: clz z0.h, p0/z, z0.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.clz.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svclz_s16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svclz_s16_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: clz z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svclz_s16_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: clz z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.clz.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svclz_s16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svclz_s16_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: clz z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svclz_s16_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: clz z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.clz.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svclz_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svclz_s32_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: clz z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svclz_s32_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: clz z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.clz.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svclz_s32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svclz_s32_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: clz z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svclz_s32_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: clz z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.clz.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svclz_s32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svclz_s32_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: clz z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svclz_s32_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: clz z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.clz.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svclz_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svclz_s64_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: clz z0.d, p0/m, z0.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svclz_s64_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: clz z0.d, p0/z, z0.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.clz.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svclz_s64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svclz_s64_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: clz z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svclz_s64_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: clz z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.clz.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svclz_s64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svclz_s64_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, #0 // =0x0
+; CHECK-NEXT: clz z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svclz_s64_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: clz z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.clz.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 16 x i8> @test_svcnt_s8_x_1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svcnt_s8_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cnt z0.b, p0/m, z0.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_s8_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnt z0.b, p0/z, z0.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnt.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svcnt_s8_x_2(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svcnt_s8_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cnt z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_s8_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnt z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnt.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svcnt_s8_z(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svcnt_s8_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.b, #0 // =0x0
+; CHECK-NEXT: cnt z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_s8_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnt z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnt.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcnt_s16_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cnt z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_s16_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnt z0.h, p0/z, z0.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_s16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcnt_s16_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cnt z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_s16_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnt z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_s16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcnt_s16_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: cnt z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_s16_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnt z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svcnt_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcnt_s32_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cnt z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_s32_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnt z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svcnt_s32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcnt_s32_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cnt z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_s32_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnt z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svcnt_s32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcnt_s32_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: cnt z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_s32_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnt z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svcnt_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcnt_s64_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cnt z0.d, p0/m, z0.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_s64_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnt z0.d, p0/z, z0.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svcnt_s64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcnt_s64_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cnt z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_s64_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnt z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svcnt_s64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcnt_s64_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, #0 // =0x0
+; CHECK-NEXT: cnt z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_s64_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnt z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_f16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_svcnt_f16_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cnt z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_f16_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnt z0.h, p0/z, z0.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_f16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_svcnt_f16_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cnt z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_f16_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnt z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_f16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_svcnt_f16_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: cnt z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_f16_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnt z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8f16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_bf16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %x) {
+; CHECK-LABEL: test_svcnt_bf16_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cnt z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_bf16_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnt z0.h, p0/z, z0.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8bf16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_bf16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x bfloat> %x) {
+; CHECK-LABEL: test_svcnt_bf16_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cnt z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_bf16_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnt z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8bf16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_bf16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x bfloat> %x) {
+; CHECK-LABEL: test_svcnt_bf16_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: cnt z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_bf16_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnt z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8bf16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svcnt_f32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_svcnt_f32_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cnt z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_f32_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnt z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4f32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svcnt_f32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_svcnt_f32_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cnt z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_f32_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnt z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4f32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svcnt_f32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_svcnt_f32_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: cnt z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_f32_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnt z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4f32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svcnt_f64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_svcnt_f64_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cnt z0.d, p0/m, z0.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_f64_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnt z0.d, p0/z, z0.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2f64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svcnt_f64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_svcnt_f64_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cnt z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_f64_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnt z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2f64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svcnt_f64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_svcnt_f64_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, #0 // =0x0
+; CHECK-NEXT: cnt z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_f64_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnt z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2f64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 16 x i8> @test_svcnot_s8_x_1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svcnot_s8_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cnot z0.b, p0/m, z0.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnot_s8_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnot z0.b, p0/z, z0.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnot.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svcnot_s8_x_2(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svcnot_s8_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cnot z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnot_s8_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnot z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnot.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svcnot_s8_z(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svcnot_s8_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.b, #0 // =0x0
+; CHECK-NEXT: cnot z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnot_s8_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnot z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnot.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 8 x i16> @test_svcnot_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcnot_s16_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cnot z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnot_s16_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnot z0.h, p0/z, z0.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnot.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcnot_s16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcnot_s16_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cnot z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnot_s16_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnot z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnot.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcnot_s16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcnot_s16_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: cnot z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnot_s16_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnot z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnot.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svcnot_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcnot_s32_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cnot z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnot_s32_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnot z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnot.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svcnot_s32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcnot_s32_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cnot z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnot_s32_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnot z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnot.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svcnot_s32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcnot_s32_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: cnot z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnot_s32_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnot z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnot.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svcnot_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcnot_s64_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cnot z0.d, p0/m, z0.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnot_s64_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnot z0.d, p0/z, z0.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnot.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svcnot_s64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcnot_s64_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cnot z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnot_s64_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnot z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnot.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svcnot_s64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcnot_s64_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, #0 // =0x0
+; CHECK-NEXT: cnot z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnot_s64_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: cnot z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnot.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 16 x i8> @test_svnot_s8_x_1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svnot_s8_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: not z0.b, p0/m, z0.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svnot_s8_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: not z0.b, p0/z, z0.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.not.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svnot_s8_x_2(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svnot_s8_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: not z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svnot_s8_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: not z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.not.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svnot_s8_z(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svnot_s8_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.b, #0 // =0x0
+; CHECK-NEXT: not z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svnot_s8_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: not z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.not.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 8 x i16> @test_svnot_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svnot_s16_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: not z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svnot_s16_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: not z0.h, p0/z, z0.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.not.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svnot_s16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svnot_s16_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: not z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svnot_s16_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: not z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.not.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svnot_s16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svnot_s16_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: not z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svnot_s16_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: not z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.not.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svnot_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svnot_s32_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: not z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svnot_s32_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: not z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.not.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svnot_s32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svnot_s32_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: not z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svnot_s32_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: not z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.not.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svnot_s32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svnot_s32_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: not z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svnot_s32_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: not z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.not.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svnot_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svnot_s64_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: not z0.d, p0/m, z0.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svnot_s64_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: not z0.d, p0/z, z0.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.not.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svnot_s64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svnot_s64_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: not z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svnot_s64_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: not z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.not.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svnot_s64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svnot_s64_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, #0 // =0x0
+; CHECK-NEXT: not z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svnot_s64_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: not z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.not.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svnot_u64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svnot_u64_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, #0 // =0x0
+; CHECK-NEXT: not z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svnot_u64_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: not z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.not.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
>From 34142d3b71dd58a235918471436e2155732cccaf Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Fri, 20 Dec 2024 16:34:33 +0000
Subject: [PATCH 6/8] [fixup] Rebase, update tests, add tests for the new
all-true patterns, change undef to poison
---
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 2 +-
llvm/lib/Target/AArch64/SVEInstrFormats.td | 8 +-
.../AArch64/zeroing-forms-counts-not.ll | 1002 ++++++++++++++++-
3 files changed, 954 insertions(+), 58 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 5c257c436fbeac..c43e9e4a46f36e 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4318,7 +4318,7 @@ let Predicates = [HasSVE2p2orSME2p2] in {
defm CLZ_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b001, "clz", AArch64clz_mt>;
defm CNT_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b010, "cnt", AArch64cnt_mt>;
defm CNOT_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b011, "cnot", AArch64cnot_mt>;
- defm NOT_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b110, " not", AArch64not_mt>;
+ defm NOT_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b110, "not", AArch64not_mt>;
// floating point
defm FABS_ZPzZ : sve_int_un_pred_arit_bitwise_fp_z<0b100, "fabs", AArch64fabs_mt>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index a9a23574de5d1d..959f98555466be 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -4998,10 +4998,10 @@ multiclass sve_int_un_pred_arit_bitwise_z<bits<3> opc, string asm, SDPatternOper
def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b1 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b1 }, asm, ZPR64>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_un_pred_arit_bitwise_fp<bits<3> opc, string asm,
diff --git a/llvm/test/CodeGen/AArch64/zeroing-forms-counts-not.ll b/llvm/test/CodeGen/AArch64/zeroing-forms-counts-not.ll
index efdf4bd6a5c64b..f7970ca81f6084 100644
--- a/llvm/test/CodeGen/AArch64/zeroing-forms-counts-not.ll
+++ b/llvm/test/CodeGen/AArch64/zeroing-forms-counts-not.ll
@@ -18,7 +18,7 @@ define <vscale x 16 x i8> @test_svcls_s8_x_1(<vscale x 16 x i1> %pg, <vscale x 1
; CHECK-2p2-NEXT: cls z0.b, p0/z, z0.b
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cls.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cls.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
ret <vscale x 16 x i8> %0
}
@@ -34,7 +34,7 @@ define <vscale x 16 x i8> @test_svcls_s8_x_2(<vscale x 16 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: cls z0.b, p0/z, z1.b
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cls.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cls.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
ret <vscale x 16 x i8> %0
}
@@ -65,7 +65,7 @@ define <vscale x 8 x i16> @test_svcls_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8
; CHECK-2p2-NEXT: cls z0.h, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cls.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cls.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
ret <vscale x 8 x i16> %0
}
@@ -81,7 +81,7 @@ define <vscale x 8 x i16> @test_svcls_s16_x_2(<vscale x 8 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: cls z0.h, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cls.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cls.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
ret <vscale x 8 x i16> %0
}
@@ -112,7 +112,7 @@ define <vscale x 4 x i32> @test_svcls_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4
; CHECK-2p2-NEXT: cls z0.s, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 4 x i32> %0
}
@@ -128,7 +128,7 @@ define <vscale x 4 x i32> @test_svcls_s32_x_2(<vscale x 4 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: cls z0.s, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 4 x i32> %0
}
@@ -159,7 +159,7 @@ define <vscale x 2 x i64> @test_svcls_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2
; CHECK-2p2-NEXT: cls z0.d, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 2 x i64> %0
}
@@ -175,7 +175,7 @@ define <vscale x 2 x i64> @test_svcls_s64_x_2(<vscale x 2 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: cls z0.d, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 2 x i64> %0
}
@@ -206,7 +206,7 @@ define <vscale x 16 x i8> @test_svclz_s8_x_1(<vscale x 16 x i1> %pg, <vscale x 1
; CHECK-2p2-NEXT: clz z0.b, p0/z, z0.b
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.clz.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.clz.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
ret <vscale x 16 x i8> %0
}
@@ -222,7 +222,7 @@ define <vscale x 16 x i8> @test_svclz_s8_x_2(<vscale x 16 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: clz z0.b, p0/z, z1.b
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.clz.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.clz.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
ret <vscale x 16 x i8> %0
}
@@ -253,7 +253,7 @@ define <vscale x 8 x i16> @test_svclz_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8
; CHECK-2p2-NEXT: clz z0.h, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.clz.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.clz.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
ret <vscale x 8 x i16> %0
}
@@ -269,7 +269,7 @@ define <vscale x 8 x i16> @test_svclz_s16_x_2(<vscale x 8 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: clz z0.h, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.clz.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.clz.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
ret <vscale x 8 x i16> %0
}
@@ -300,7 +300,7 @@ define <vscale x 4 x i32> @test_svclz_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4
; CHECK-2p2-NEXT: clz z0.s, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.clz.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.clz.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 4 x i32> %0
}
@@ -316,7 +316,7 @@ define <vscale x 4 x i32> @test_svclz_s32_x_2(<vscale x 4 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: clz z0.s, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.clz.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.clz.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 4 x i32> %0
}
@@ -347,7 +347,7 @@ define <vscale x 2 x i64> @test_svclz_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2
; CHECK-2p2-NEXT: clz z0.d, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.clz.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.clz.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 2 x i64> %0
}
@@ -363,7 +363,7 @@ define <vscale x 2 x i64> @test_svclz_s64_x_2(<vscale x 2 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: clz z0.d, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.clz.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.clz.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 2 x i64> %0
}
@@ -394,7 +394,7 @@ define <vscale x 16 x i8> @test_svcnt_s8_x_1(<vscale x 16 x i1> %pg, <vscale x 1
; CHECK-2p2-NEXT: cnt z0.b, p0/z, z0.b
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnt.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnt.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
ret <vscale x 16 x i8> %0
}
@@ -410,7 +410,7 @@ define <vscale x 16 x i8> @test_svcnt_s8_x_2(<vscale x 16 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: cnt z0.b, p0/z, z1.b
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnt.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnt.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
ret <vscale x 16 x i8> %0
}
@@ -441,7 +441,7 @@ define <vscale x 8 x i16> @test_svcnt_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8
; CHECK-2p2-NEXT: cnt z0.h, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
ret <vscale x 8 x i16> %0
}
@@ -457,7 +457,7 @@ define <vscale x 8 x i16> @test_svcnt_s16_x_2(<vscale x 8 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: cnt z0.h, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
ret <vscale x 8 x i16> %0
}
@@ -488,7 +488,7 @@ define <vscale x 4 x i32> @test_svcnt_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4
; CHECK-2p2-NEXT: cnt z0.s, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 4 x i32> %0
}
@@ -504,7 +504,7 @@ define <vscale x 4 x i32> @test_svcnt_s32_x_2(<vscale x 4 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: cnt z0.s, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 4 x i32> %0
}
@@ -535,7 +535,7 @@ define <vscale x 2 x i64> @test_svcnt_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2
; CHECK-2p2-NEXT: cnt z0.d, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 2 x i64> %0
}
@@ -551,7 +551,7 @@ define <vscale x 2 x i64> @test_svcnt_s64_x_2(<vscale x 2 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: cnt z0.d, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 2 x i64> %0
}
@@ -582,7 +582,7 @@ define <vscale x 8 x i16> @test_svcnt_f16_x_1(<vscale x 8 x i1> %pg, <vscale x 8
; CHECK-2p2-NEXT: cnt z0.h, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8f16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 8 x i16> %0
}
@@ -598,7 +598,7 @@ define <vscale x 8 x i16> @test_svcnt_f16_x_2(<vscale x 8 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: cnt z0.h, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8f16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 8 x i16> %0
}
@@ -629,7 +629,7 @@ define <vscale x 8 x i16> @test_svcnt_bf16_x_1(<vscale x 8 x i1> %pg, <vscale x
; CHECK-2p2-NEXT: cnt z0.h, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8bf16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8bf16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %x)
ret <vscale x 8 x i16> %0
}
@@ -645,7 +645,7 @@ define <vscale x 8 x i16> @test_svcnt_bf16_x_2(<vscale x 8 x i1> %pg, double %z0
; CHECK-2p2-NEXT: cnt z0.h, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8bf16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8bf16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %x)
ret <vscale x 8 x i16> %0
}
@@ -676,7 +676,7 @@ define <vscale x 4 x i32> @test_svcnt_f32_x_1(<vscale x 4 x i1> %pg, <vscale x 4
; CHECK-2p2-NEXT: cnt z0.s, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4f32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4f32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 4 x i32> %0
}
@@ -692,7 +692,7 @@ define <vscale x 4 x i32> @test_svcnt_f32_x_2(<vscale x 4 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: cnt z0.s, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4f32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4f32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 4 x i32> %0
}
@@ -723,7 +723,7 @@ define <vscale x 2 x i64> @test_svcnt_f64_x_1(<vscale x 2 x i1> %pg, <vscale x 2
; CHECK-2p2-NEXT: cnt z0.d, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2f64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2f64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 2 x i64> %0
}
@@ -739,7 +739,7 @@ define <vscale x 2 x i64> @test_svcnt_f64_x_2(<vscale x 2 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: cnt z0.d, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2f64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2f64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 2 x i64> %0
}
@@ -770,7 +770,7 @@ define <vscale x 16 x i8> @test_svcnot_s8_x_1(<vscale x 16 x i1> %pg, <vscale x
; CHECK-2p2-NEXT: cnot z0.b, p0/z, z0.b
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnot.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnot.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
ret <vscale x 16 x i8> %0
}
@@ -786,7 +786,7 @@ define <vscale x 16 x i8> @test_svcnot_s8_x_2(<vscale x 16 x i1> %pg, double %z0
; CHECK-2p2-NEXT: cnot z0.b, p0/z, z1.b
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnot.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnot.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
ret <vscale x 16 x i8> %0
}
@@ -817,7 +817,7 @@ define <vscale x 8 x i16> @test_svcnot_s16_x_1(<vscale x 8 x i1> %pg, <vscale x
; CHECK-2p2-NEXT: cnot z0.h, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnot.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnot.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
ret <vscale x 8 x i16> %0
}
@@ -833,7 +833,7 @@ define <vscale x 8 x i16> @test_svcnot_s16_x_2(<vscale x 8 x i1> %pg, double %z0
; CHECK-2p2-NEXT: cnot z0.h, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnot.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnot.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
ret <vscale x 8 x i16> %0
}
@@ -864,7 +864,7 @@ define <vscale x 4 x i32> @test_svcnot_s32_x_1(<vscale x 4 x i1> %pg, <vscale x
; CHECK-2p2-NEXT: cnot z0.s, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnot.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnot.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 4 x i32> %0
}
@@ -880,7 +880,7 @@ define <vscale x 4 x i32> @test_svcnot_s32_x_2(<vscale x 4 x i1> %pg, double %z0
; CHECK-2p2-NEXT: cnot z0.s, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnot.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnot.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 4 x i32> %0
}
@@ -911,7 +911,7 @@ define <vscale x 2 x i64> @test_svcnot_s64_x_1(<vscale x 2 x i1> %pg, <vscale x
; CHECK-2p2-NEXT: cnot z0.d, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnot.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnot.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 2 x i64> %0
}
@@ -927,7 +927,7 @@ define <vscale x 2 x i64> @test_svcnot_s64_x_2(<vscale x 2 x i1> %pg, double %z0
; CHECK-2p2-NEXT: cnot z0.d, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnot.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnot.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 2 x i64> %0
}
@@ -958,7 +958,7 @@ define <vscale x 16 x i8> @test_svnot_s8_x_1(<vscale x 16 x i1> %pg, <vscale x 1
; CHECK-2p2-NEXT: not z0.b, p0/z, z0.b
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.not.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.not.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
ret <vscale x 16 x i8> %0
}
@@ -974,7 +974,7 @@ define <vscale x 16 x i8> @test_svnot_s8_x_2(<vscale x 16 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: not z0.b, p0/z, z1.b
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.not.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.not.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
ret <vscale x 16 x i8> %0
}
@@ -1005,7 +1005,7 @@ define <vscale x 8 x i16> @test_svnot_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8
; CHECK-2p2-NEXT: not z0.h, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.not.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.not.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
ret <vscale x 8 x i16> %0
}
@@ -1021,7 +1021,7 @@ define <vscale x 8 x i16> @test_svnot_s16_x_2(<vscale x 8 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: not z0.h, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.not.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.not.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
ret <vscale x 8 x i16> %0
}
@@ -1052,7 +1052,7 @@ define <vscale x 4 x i32> @test_svnot_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4
; CHECK-2p2-NEXT: not z0.s, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.not.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.not.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 4 x i32> %0
}
@@ -1068,7 +1068,7 @@ define <vscale x 4 x i32> @test_svnot_s32_x_2(<vscale x 4 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: not z0.s, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.not.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.not.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 4 x i32> %0
}
@@ -1099,7 +1099,7 @@ define <vscale x 2 x i64> @test_svnot_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2
; CHECK-2p2-NEXT: not z0.d, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.not.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.not.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 2 x i64> %0
}
@@ -1115,7 +1115,7 @@ define <vscale x 2 x i64> @test_svnot_s64_x_2(<vscale x 2 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: not z0.d, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.not.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.not.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 2 x i64> %0
}
@@ -1135,18 +1135,914 @@ entry:
ret <vscale x 2 x i64> %0
}
-define <vscale x 2 x i64> @test_svnot_u64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x i64> %x) {
-; CHECK-LABEL: test_svnot_u64_z:
+define <vscale x 16 x i8> @test_svcls_nxv16i8_ptrue_u(double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svcls_nxv16i8_ptrue_u:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov z0.d, #0 // =0x0
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cls z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcls_nxv16i8_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.b
+; CHECK-2p2-NEXT: cls z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cls.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svcls_nxv16i8_ptrue(double %z0, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) {
+; CHECK-LABEL: test_svcls_nxv16i8_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: cls z0.b, p0/m, z2.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcls_nxv16i8_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.b
+; CHECK-2p2-NEXT: cls z0.b, p0/z, z2.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cls.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %y)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 8 x i16> @test_svcls_nxv8i16_ptrue_u(double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcls_nxv8i16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cls z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcls_nxv8i16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: cls z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cls.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcls_nxv8i16_ptrue(double %z0, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) {
+; CHECK-LABEL: test_svcls_nxv8i16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: cls z0.h, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcls_nxv8i16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: cls z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cls.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %y)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svcls_nxv4i32_ptrue_u(double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcls_nxv4i32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cls z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcls_nxv4i32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: cls z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svcls_nxv4i32_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: test_svcls_nxv4i32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: cls z0.s, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcls_nxv4i32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: cls z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %y)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svcls_nxv2i64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcls_nxv2i64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cls z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcls_nxv2i64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: cls z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svcls_nxv2i64_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
+; CHECK-LABEL: test_svcls_nxv2i64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: cls z0.d, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcls_nxv2i64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: cls z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 16 x i8> @test_svclz_nxv16i8_ptrue_u(double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svclz_nxv16i8_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: clz z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svclz_nxv16i8_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.b
+; CHECK-2p2-NEXT: clz z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.clz.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svclz_nxv16i8_ptrue(double %z0, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) {
+; CHECK-LABEL: test_svclz_nxv16i8_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: clz z0.b, p0/m, z2.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svclz_nxv16i8_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.b
+; CHECK-2p2-NEXT: clz z0.b, p0/z, z2.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.clz.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %y)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 8 x i16> @test_svclz_nxv8i16_ptrue_u(double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svclz_nxv8i16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: clz z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svclz_nxv8i16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: clz z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.clz.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svclz_nxv8i16_ptrue(double %z0, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) {
+; CHECK-LABEL: test_svclz_nxv8i16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: clz z0.h, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svclz_nxv8i16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: clz z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.clz.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %y)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svclz_nxv4i32_ptrue_u(double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svclz_nxv4i32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: clz z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svclz_nxv4i32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: clz z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.clz.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svclz_nxv4i32_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: test_svclz_nxv4i32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: clz z0.s, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svclz_nxv4i32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: clz z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.clz.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %y)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svclz_nxv2i64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svclz_nxv2i64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: clz z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svclz_nxv2i64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: clz z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.clz.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svclz_nxv2i64_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
+; CHECK-LABEL: test_svclz_nxv2i64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: clz z0.d, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svclz_nxv2i64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: clz z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.clz.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 16 x i8> @test_svcnt_nxv16i8_ptrue_u(double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svcnt_nxv16i8_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cnt z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv16i8_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.b
+; CHECK-2p2-NEXT: cnt z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnt.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svcnt_nxv16i8_ptrue(double %z0, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) {
+; CHECK-LABEL: test_svcnt_nxv16i8_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: cnt z0.b, p0/m, z2.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv16i8_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.b
+; CHECK-2p2-NEXT: cnt z0.b, p0/z, z2.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnt.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %y)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_nxv8i16_ptrue_u(double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcnt_nxv8i16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cnt z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv8i16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: cnt z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_nxv8i16_ptrue(double %z0, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) {
+; CHECK-LABEL: test_svcnt_nxv8i16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: cnt z0.h, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv8i16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: cnt z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %y)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svcnt_nxv4i32_ptrue_u(double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcnt_nxv4i32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cnt z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv4i32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: cnt z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svcnt_nxv4i32_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: test_svcnt_nxv4i32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: cnt z0.s, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv4i32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: cnt z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %y)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svcnt_nxv2i64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcnt_nxv2i64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cnt z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv2i64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: cnt z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svcnt_nxv2i64_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
+; CHECK-LABEL: test_svcnt_nxv2i64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: cnt z0.d, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv2i64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: cnt z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_nxv8f16_ptrue_u(double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_svcnt_nxv8f16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cnt z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv8f16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: cnt z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8f16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_nxv8f16_ptrue(double %z0, <vscale x 8 x i16> %x, <vscale x 8 x half> %y) {
+; CHECK-LABEL: test_svcnt_nxv8f16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: cnt z0.h, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv8f16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: cnt z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8f16(<vscale x 8 x i16> %x, <vscale x 8 x i1> %pg, <vscale x 8 x half> %y)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_nxv8bf16_ptrue_u(double %z0, <vscale x 8 x bfloat> %x) {
+; CHECK-LABEL: test_svcnt_nxv8bf16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cnt z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv8bf16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: cnt z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8bf16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcnt_nxv8bf16_ptrue(double %z0, <vscale x 8 x i16> %x, <vscale x 8 x bfloat> %y) {
+; CHECK-LABEL: test_svcnt_nxv8bf16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: cnt z0.h, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv8bf16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: cnt z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8bf16(<vscale x 8 x i16> %x, <vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %y)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svcnt_nxv4f32_ptrue_u(double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_svcnt_nxv4f32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cnt z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv4f32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: cnt z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4f32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svcnt_nxv4f32_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 4 x float> %y) {
+; CHECK-LABEL: test_svcnt_nxv4f32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: cnt z0.s, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv4f32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: cnt z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4f32(<vscale x 4 x i32> %x, <vscale x 4 x i1> %pg, <vscale x 4 x float> %y)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svcnt_nxv2f64_ptrue_u(double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_svcnt_nxv2f64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cnt z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv2f64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: cnt z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2f64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svcnt_nxv2f64_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 2 x double> %y) {
+; CHECK-LABEL: test_svcnt_nxv2f64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: cnt z0.d, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnt_nxv2f64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: cnt z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2f64(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 2 x double> %y)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 16 x i8> @test_svcnot_nxv16i8_ptrue_u(double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svcnot_nxv16i8_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cnot z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnot_nxv16i8_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.b
+; CHECK-2p2-NEXT: cnot z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnot.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svcnot_nxv16i8_ptrue(double %z0, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) {
+; CHECK-LABEL: test_svcnot_nxv16i8_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: cnot z0.b, p0/m, z2.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnot_nxv16i8_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.b
+; CHECK-2p2-NEXT: cnot z0.b, p0/z, z2.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnot.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %y)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 8 x i16> @test_svcnot_nxv8i16_ptrue_u(double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svcnot_nxv8i16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cnot z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnot_nxv8i16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: cnot z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnot.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svcnot_nxv8i16_ptrue(double %z0, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) {
+; CHECK-LABEL: test_svcnot_nxv8i16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: cnot z0.h, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnot_nxv8i16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: cnot z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnot.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %y)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svcnot_nxv4i32_ptrue_u(double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svcnot_nxv4i32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cnot z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnot_nxv4i32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: cnot z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnot.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svcnot_nxv4i32_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: test_svcnot_nxv4i32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: cnot z0.s, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnot_nxv4i32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: cnot z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnot.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %y)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svcnot_nxv2i64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svcnot_nxv2i64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cnot z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnot_nxv2i64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: cnot z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnot.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svcnot_nxv2i64_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
+; CHECK-LABEL: test_svcnot_nxv2i64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: cnot z0.d, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcnot_nxv2i64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: cnot z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnot.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 16 x i8> @test_svnot_nxv16i8_ptrue_u(double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svnot_nxv16i8_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: not z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svnot_nxv16i8_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.b
+; CHECK-2p2-NEXT: not z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.not.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svnot_nxv16i8_ptrue(double %z0, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) {
+; CHECK-LABEL: test_svnot_nxv16i8_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: not z0.b, p0/m, z2.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svnot_nxv16i8_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.b
+; CHECK-2p2-NEXT: not z0.b, p0/z, z2.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.not.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %y)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 8 x i16> @test_svnot_nxv8i16_ptrue_u(double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svnot_nxv8i16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: not z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svnot_nxv8i16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: not z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.not.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svnot_nxv8i16_ptrue(double %z0, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) {
+; CHECK-LABEL: test_svnot_nxv8i16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: not z0.h, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svnot_nxv8i16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: not z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.not.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %y)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svnot_nxv4i32_ptrue_u(double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svnot_nxv4i32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: not z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svnot_nxv4i32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: not z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.not.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svnot_nxv4i32_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: test_svnot_nxv4i32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: not z0.s, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svnot_nxv4i32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: not z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.not.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %y)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svnot_nxv2i64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svnot_nxv2i64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: not z0.d, p0/m, z1.d
; CHECK-NEXT: ret
;
-; CHECK-2p2-LABEL: test_svnot_u64_z:
+; CHECK-2p2-LABEL: test_svnot_nxv2i64_ptrue_u:
; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
; CHECK-2p2-NEXT: not z0.d, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.not.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.not.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svnot_nxv2i64_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
+; CHECK-LABEL: test_svnot_nxv2i64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: not z0.d, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svnot_nxv2i64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: not z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.not.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
ret <vscale x 2 x i64> %0
}
>From 1965e53776672e5358213626decb90764fff8868 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Tue, 19 Nov 2024 12:23:46 +0000
Subject: [PATCH 7/8] [AArch64] Generate zeroing forms of certain SVE2.2
instructions (7/11)
---
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 2 +-
llvm/lib/Target/AArch64/SVEInstrFormats.td | 6 +-
.../CodeGen/AArch64/zeroing-forms-flogb.ll | 146 ++++++++++++++++++
3 files changed, 152 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/zeroing-forms-flogb.ll
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index c43e9e4a46f36e..7e57480f2e39bb 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4280,7 +4280,7 @@ let Predicates = [HasSVE2p2orSME2p2] in {
defm SCVTF_ZPzZ : sve_fp_z2op_p_zd_c<0b0, "scvtf", "int_aarch64_sve_scvtf", AArch64scvtf_mt>;
defm UCVTF_ZPzZ : sve_fp_z2op_p_zd_c<0b1, "ucvtf", "int_aarch64_sve_ucvtf", AArch64ucvtf_mt>;
// Signed integer base 2 logarithm of fp value, zeroing predicate
- defm FLOGB_ZPzZ : sve_fp_z2op_p_zd_d_flogb<"flogb">;
+ defm FLOGB_ZPzZ : sve_fp_z2op_p_zd_d_flogb<"flogb", int_aarch64_sve_flogb>;
// SVE2 integer unary operations, zeroing predicate
def URECPE_ZPzZ : sve2_int_un_pred_arit_z<0b10, 0b00, "urecpe", ZPR32>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 959f98555466be..0e501a8ca254e5 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -3342,10 +3342,14 @@ multiclass sve_fp_z2op_p_zd_c<bit U, string asm, string int_op, SDPatternOperato
defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2f64, ir_op, nxv2i1,nxv2i64, !cast<Instruction>(NAME # _DtoD)>;
}
-multiclass sve_fp_z2op_p_zd_d_flogb<string asm> {
+multiclass sve_fp_z2op_p_zd_d_flogb<string asm, SDPatternOperator op> {
def _H : sve_fp_z2op_p_zd<0b0011001, asm, ZPR16, ZPR16>;
def _S : sve_fp_z2op_p_zd<0b0011010, asm, ZPR32, ZPR32>;
def _D : sve_fp_z2op_p_zd<0b0011011, asm, ZPR64, ZPR64>;
+
+ def : SVE_3_Op_UndefZero_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_UndefZero_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_UndefZero_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_fp_z2op_p_zd_b_0<string asm, string op> {
diff --git a/llvm/test/CodeGen/AArch64/zeroing-forms-flogb.ll b/llvm/test/CodeGen/AArch64/zeroing-forms-flogb.ll
new file mode 100644
index 00000000000000..8d799cd3224212
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/zeroing-forms-flogb.ll
@@ -0,0 +1,146 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mattr=+sve2 < %s | FileCheck %s
+; RUN: llc -mattr=+sve2p2 < %s | FileCheck %s -check-prefix CHECK-2p2
+
+; RUN: llc -mattr=+sme2 -force-streaming < %s | FileCheck %s
+; RUN: llc -mattr=+sme2p2 -force-streaming < %s | FileCheck %s -check-prefix CHECK-2p2
+
+target triple = "aarch64-linux"
+
+define <vscale x 8 x i16> @test_svlogb_f16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_svlogb_f16_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: flogb z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svlogb_f16_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: flogb z0.h, p0/z, z0.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.flogb.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svlogb_f16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_svlogb_f16_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: flogb z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svlogb_f16_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: flogb z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.flogb.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svlogb_f16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_svlogb_f16_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: flogb z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svlogb_f16_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: flogb z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.flogb.nxv8f16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svlogb_f32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_svlogb_f32_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: flogb z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svlogb_f32_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: flogb z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.flogb.nxv4f32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svlogb_f32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_svlogb_f32_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: flogb z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svlogb_f32_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: flogb z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.flogb.nxv4f32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svlogb_f32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_svlogb_f32_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: flogb z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svlogb_f32_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: flogb z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.flogb.nxv4f32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svlogb_f64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_svlogb_f64_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: flogb z0.d, p0/m, z0.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svlogb_f64_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: flogb z0.d, p0/z, z0.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.flogb.nxv2f64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svlogb_f64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_svlogb_f64_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: flogb z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svlogb_f64_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: flogb z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.flogb.nxv2f64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svlogb_f64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_svlogb_f64_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, #0 // =0x0
+; CHECK-NEXT: flogb z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svlogb_f64_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: flogb z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.flogb.nxv2f64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 2 x i64> %0
+}
>From e22e4caf7eb011ccf1c0aa98e1b2b9c43b477c84 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Fri, 20 Dec 2024 17:10:54 +0000
Subject: [PATCH 8/8] [fixup] Rebase, update tests, add tests for the new
all-true patterns, change undef to poison
---
llvm/lib/Target/AArch64/SVEInstrFormats.td | 6 +-
.../CodeGen/AArch64/zeroing-forms-flogb.ll | 124 +++++++++++++++++-
2 files changed, 121 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 0e501a8ca254e5..a985d3a8b252c8 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -3347,9 +3347,9 @@ multiclass sve_fp_z2op_p_zd_d_flogb<string asm, SDPatternOperator op> {
def _S : sve_fp_z2op_p_zd<0b0011010, asm, ZPR32, ZPR32>;
def _D : sve_fp_z2op_p_zd<0b0011011, asm, ZPR64, ZPR64>;
- def : SVE_3_Op_UndefZero_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_UndefZero_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_UndefZero_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_fp_z2op_p_zd_b_0<string asm, string op> {
diff --git a/llvm/test/CodeGen/AArch64/zeroing-forms-flogb.ll b/llvm/test/CodeGen/AArch64/zeroing-forms-flogb.ll
index 8d799cd3224212..23620a3419b999 100644
--- a/llvm/test/CodeGen/AArch64/zeroing-forms-flogb.ll
+++ b/llvm/test/CodeGen/AArch64/zeroing-forms-flogb.ll
@@ -18,7 +18,7 @@ define <vscale x 8 x i16> @test_svlogb_f16_x_1(<vscale x 8 x i1> %pg, <vscale x
; CHECK-2p2-NEXT: flogb z0.h, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.flogb.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.flogb.nxv8f16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 8 x i16> %0
}
@@ -33,7 +33,7 @@ define <vscale x 8 x i16> @test_svlogb_f16_x_2(<vscale x 8 x i1> %pg, double %z0
; CHECK-2p2-NEXT: flogb z0.h, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.flogb.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.flogb.nxv8f16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 8 x i16> %0
}
@@ -64,7 +64,7 @@ define <vscale x 4 x i32> @test_svlogb_f32_x_1(<vscale x 4 x i1> %pg, <vscale x
; CHECK-2p2-NEXT: flogb z0.s, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.flogb.nxv4f32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.flogb.nxv4f32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 4 x i32> %0
}
@@ -79,7 +79,7 @@ define <vscale x 4 x i32> @test_svlogb_f32_x_2(<vscale x 4 x i1> %pg, double %z0
; CHECK-2p2-NEXT: flogb z0.s, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.flogb.nxv4f32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.flogb.nxv4f32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 4 x i32> %0
}
@@ -110,7 +110,7 @@ define <vscale x 2 x i64> @test_svlogb_f64_x_1(<vscale x 2 x i1> %pg, <vscale x
; CHECK-2p2-NEXT: flogb z0.d, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.flogb.nxv2f64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.flogb.nxv2f64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 2 x i64> %0
}
@@ -125,7 +125,7 @@ define <vscale x 2 x i64> @test_svlogb_f64_x_2(<vscale x 2 x i1> %pg, double %z0
; CHECK-2p2-NEXT: flogb z0.d, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.flogb.nxv2f64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.flogb.nxv2f64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 2 x i64> %0
}
@@ -144,3 +144,115 @@ entry:
%0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.flogb.nxv2f64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 2 x i64> %0
}
+
+define <vscale x 8 x i16> @test_svlogb_nxv8f16_ptrue_u(double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_svlogb_nxv8f16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: flogb z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svlogb_nxv8f16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: flogb z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.flogb.nxv8f16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svlogb_nxv8f16_ptrue(double %z0, <vscale x 8 x i16> %x, <vscale x 8 x half> %y) {
+; CHECK-LABEL: test_svlogb_nxv8f16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: flogb z0.h, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svlogb_nxv8f16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: flogb z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.flogb.nxv8f16(<vscale x 8 x i16> %x, <vscale x 8 x i1> %pg, <vscale x 8 x half> %y)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svlogb_nxv4f32_ptrue_u(double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_svlogb_nxv4f32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: flogb z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svlogb_nxv4f32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: flogb z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.flogb.nxv4f32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svlogb_nxv4f32_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 4 x float> %y) {
+; CHECK-LABEL: test_svlogb_nxv4f32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: flogb z0.s, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svlogb_nxv4f32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: flogb z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.flogb.nxv4f32(<vscale x 4 x i32> %x, <vscale x 4 x i1> %pg, <vscale x 4 x float> %y)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svlogb_nxv2f64_ptrue_u(double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_svlogb_nxv2f64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: flogb z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svlogb_nxv2f64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: flogb z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.flogb.nxv2f64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svlogb_nxv2f64_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 2 x double> %y) {
+; CHECK-LABEL: test_svlogb_nxv2f64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: flogb z0.d, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svlogb_nxv2f64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: flogb z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.flogb.nxv2f64(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 2 x double> %y)
+ ret <vscale x 2 x i64> %0
+}
+
More information about the llvm-commits
mailing list