[llvm] b2073fb - [AArch64] Prefer SVE2.2 zeroing forms of certain instructions with an all-true predicate (#120595)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 24 02:18:53 PST 2024
Author: Momchil Velikov
Date: 2024-12-24T10:18:48Z
New Revision: b2073fb9b9282c0f59861a137660f6a0782d7468
URL: https://github.com/llvm/llvm-project/commit/b2073fb9b9282c0f59861a137660f6a0782d7468
DIFF: https://github.com/llvm/llvm-project/commit/b2073fb9b9282c0f59861a137660f6a0782d7468.diff
LOG: [AArch64] Prefer SVE2.2 zeroing forms of certain instructions with an all-true predicate (#120595)
When the predicate of a destructive operation is known to be all-true,
for example
fabs z0.s, p0/m, z1.s
then the entire output register is written and we can use a zeroing
(instead of a merging) form of the instruction, for example
fabs z0.s, p0/z, z1.s
thus eliminate the dependency on the input-output destination register
without the need to insert a `movprfx`.
This patch complements (and in the case of
https://github.com/llvm/llvm-project/commit/2b3266c1701f315d7e89c81977800001563afacb,
fixes a regression) the following:
https://github.com/llvm/llvm-project/commit/7f4414b2a1a4d9f802a03f56894c406f0fe3e9a9
[AArch64] Generate zeroing forms of certain SVE2.2 instructions (4/11)
(https://github.com/llvm/llvm-project/pull/116830)
https://github.com/llvm/llvm-project/commit/2474cf7ad123ea14308293a2237e3552cddb1136
[AArch64] Generate zeroing forms of certain SVE2.2 instructions (3/11)
(https://github.com/llvm/llvm-project/pull/116829)
https://github.com/llvm/llvm-project/commit/6f285d31159501050de5563b1a844a3e1ac79a03
[AArch64] Generate zeroing forms of certain SVE2.2 instructions (2/11)
(https://github.com/llvm/llvm-project/pull/116828)
https://github.com/llvm/llvm-project/commit/2b3266c1701f315d7e89c81977800001563afacb
[AArch64] Generate zeroing forms of certain SVE2.2 instructions (1/11)
(https://github.com/llvm/llvm-project/pull/116259)
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/lib/Target/AArch64/SVEInstrFormats.td
llvm/test/CodeGen/AArch64/zeroing-forms-abs-neg.ll
llvm/test/CodeGen/AArch64/zeroing-forms-fcvt-bfcvt.ll
llvm/test/CodeGen/AArch64/zeroing-forms-fcvtlt-fcvtx.ll
llvm/test/CodeGen/AArch64/zeroing-forms-fcvtzsu.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index ff3ca8a24fc04a..6aa8cd4f0232ac 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -228,6 +228,8 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
return false;
}
+ bool SelectAny(SDValue) { return true; }
+
bool SelectDupZero(SDValue N) {
switch(N->getOpcode()) {
case AArch64ISD::DUP:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index b37f4a08755c5f..629098cda0c4e7 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -381,9 +381,6 @@ def NoUseScalarIncVL : Predicate<"!Subtarget->useScalarIncVL()">;
def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;
-def UseUnaryUndefPseudos
- : Predicate<"!(Subtarget->isSVEorStreamingSVEAvailable() && (Subtarget->hasSVE2p2() || Subtarget->hasSME2p2()))">;
-
def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
SDTCisInt<1>]>>;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index c8892de6474375..7dd6d49bf20227 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -675,14 +675,6 @@ let Predicates = [HasSVEorSME] in {
defm FABS_ZPmZ : sve_int_un_pred_arit_bitwise_fp<0b100, "fabs", AArch64fabs_mt>;
defm FNEG_ZPmZ : sve_int_un_pred_arit_bitwise_fp<0b101, "fneg", AArch64fneg_mt>;
- let Predicates = [HasSVEorSME, UseUnaryUndefPseudos] in {
- defm FABS_ZPmZ : sve_fp_un_pred_arit_hsd<AArch64fabs_mt>;
- defm FNEG_ZPmZ : sve_fp_un_pred_arit_hsd<AArch64fneg_mt>;
-
- defm ABS_ZPmZ : sve_int_un_pred_arit_bhsd<AArch64abs_mt>;
- defm NEG_ZPmZ : sve_int_un_pred_arit_bhsd<AArch64neg_mt>;
- }
-
foreach VT = [nxv2bf16, nxv4bf16, nxv8bf16] in {
// No dedicated instruction, so just clear the sign bit.
def : Pat<(VT (fabs VT:$op)),
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index a831de878a9107..0ef862fc1a27cf 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -484,6 +484,7 @@ let Predicates = [HasSVEorSME] in {
//===----------------------------------------------------------------------===//
def SVEDup0 : ComplexPattern<vAny, 0, "SelectDupZero", []>;
def SVEDup0Undef : ComplexPattern<vAny, 0, "SelectDupZeroOrUndef", []>;
+def SVEAny : ComplexPattern<vAny, 0, "SelectAny", []>;
class SVE_1_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
Instruction inst>
@@ -504,10 +505,15 @@ multiclass SVE_1_Op_PassthruUndef_Pat<ValueType vtd, SDPatternOperator op, Value
(inst $Op3, $Op1, $Op2)>;
}
-class SVE_1_Op_PassthruUndefZero_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
- ValueType vts, Instruction inst>
- : Pat<(vtd (op pg:$Op1, vts:$Op2, (vtd (SVEDup0Undef)))),
- (inst $Op1, $Op2)>;
+multiclass SVE_1_Op_PassthruUndefZero_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
+ ValueType vts, Instruction inst> {
+ let AddedComplexity = 1 in {
+ def : Pat<(vtd (op pg:$Op1, vts:$Op2, (vtd (SVEDup0Undef)))),
+ (inst $Op1, $Op2)>;
+ def : Pat<(vtd (op (pg (SVEAllActive:$Op1)), vts:$Op2, (vtd (SVEAny)))),
+ (inst $Op1, $Op2)>;
+ }
+}
// Used to match FP_ROUND_MERGE_PASSTHRU, which has an additional flag for the
// type of rounding. This is matched by timm0_1 in pattern below and ignored.
@@ -576,10 +582,15 @@ multiclass SVE_3_Op_Undef_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1
(inst $Op1, $Op2, $Op3)>;
}
-class SVE_3_Op_UndefZero_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
- ValueType vt2, ValueType vt3, Instruction inst>
- : Pat<(vtd (op (vt1 (SVEDup0Undef)), vt2:$Op1, vt3:$Op2)),
- (inst $Op1, $Op2)>;
+multiclass SVE_3_Op_UndefZero_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
+ ValueType vt2, ValueType vt3, Instruction inst> {
+ let AddedComplexity = 1 in {
+ def : Pat<(vtd (op (vt1 (SVEDup0Undef)), vt2:$Op1, vt3:$Op2)),
+ (inst $Op1, $Op2)>;
+ def : Pat<(vtd (op (vt1 (SVEAny)), (vt2 (SVEAllActive:$Op2)), vt3:$Op3)),
+ (inst $Op2, $Op3)>;
+ }
+}
class SVE_4_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, ValueType vt3, ValueType vt4,
@@ -2840,8 +2851,8 @@ multiclass sve2_fp_convert_up_long_z<string asm, string op> {
def _HtoS : sve2_fp_convert_precision<0b1001, 0b0, asm, ZPR32, ZPR16>;
def _StoD : sve2_fp_convert_precision<0b1111, 0b0, asm, ZPR64, ZPR32>;
- def : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
- def : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
}
multiclass sve2_fp_convert_down_narrow_z<string asm> {
@@ -3256,7 +3267,7 @@ class sve_fp_z2op_p_zd<bits<7> opc,string asm, RegisterOperand i_zprtype,
multiclass sve_fp_z2op_p_zd<string asm, SDPatternOperator op> {
def _DtoS : sve_fp_z2op_p_zd<0b0001010, asm, ZPR64, ZPR32>;
- def : SVE_3_Op_UndefZero_Pat<nxv4f32, op, nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv4f32, op, nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
}
multiclass sve_fp_z2op_p_zd_hsd<bits<5> opc, string asm> {
@@ -3273,7 +3284,7 @@ multiclass sve_fp_z2op_p_zd_frint<bits<2> opc, string asm> {
multiclass sve_fp_z2op_p_zd_bfcvt<string asm, SDPatternOperator op> {
def NAME : sve_fp_z2op_p_zd<0b1001010, asm, ZPR32, ZPR16>;
- def : SVE_3_Op_UndefZero_Pat<nxv8bf16, op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv8bf16, op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
}
multiclass sve_fp_z2op_p_zd_d<bit U, string asm, string int_op, SDPatternOperator ir_op> {
@@ -3285,14 +3296,14 @@ multiclass sve_fp_z2op_p_zd_d<bit U, string asm, string int_op, SDPatternOperato
def _DtoS : sve_fp_z2op_p_zd<{ 0b111100, U }, asm, ZPR64, ZPR32>;
def _DtoD : sve_fp_z2op_p_zd<{ 0b111111, U }, asm, ZPR64, ZPR64>;
- def : SVE_3_Op_UndefZero_Pat<nxv4i32, !cast<SDPatternOperator>(int_op # _i32f64), nxv4i32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
- def : SVE_3_Op_UndefZero_Pat<nxv2i64, !cast<SDPatternOperator>(int_op # _i64f32), nxv2i64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
- def : SVE_3_Op_UndefZero_Pat<nxv4i32, !cast<SDPatternOperator>(int_op # _i32f16), nxv4i32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
- def : SVE_3_Op_UndefZero_Pat<nxv2i64, !cast<SDPatternOperator>(int_op # _i64f16), nxv2i64, nxv2i1, nxv8f16, !cast<Instruction>(NAME # _HtoD)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv4i32, !cast<SDPatternOperator>(int_op # _i32f64), nxv4i32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv2i64, !cast<SDPatternOperator>(int_op # _i64f32), nxv2i64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv4i32, !cast<SDPatternOperator>(int_op # _i32f16), nxv4i32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv2i64, !cast<SDPatternOperator>(int_op # _i64f16), nxv2i64, nxv2i1, nxv8f16, !cast<Instruction>(NAME # _HtoD)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, ir_op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _HtoH)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, ir_op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoS)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, ir_op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoD)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, ir_op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _HtoH)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, ir_op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoS)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, ir_op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoD)>;
}
multiclass sve_fp_z2op_p_zd_c<bit U, string asm> {
@@ -3319,12 +3330,12 @@ multiclass sve_fp_z2op_p_zd_b_0<string asm, string op> {
def _DtoS : sve_fp_z2op_p_zd<0b1101010, asm, ZPR64, ZPR32>;
def _StoD : sve_fp_z2op_p_zd<0b1101011, asm, ZPR32, ZPR64>;
- def : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f32), nxv8f16, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoH)>;
- def : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f64), nxv8f16, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoH)>;
- def : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
- def : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
- def : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f16), nxv2f64, nxv2i1, nxv8f16, !cast<Instruction>(NAME # _HtoD)>;
- def : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f32), nxv8f16, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoH)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f64), nxv8f16, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoH)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f16), nxv2f64, nxv2i1, nxv8f16, !cast<Instruction>(NAME # _HtoD)>;
+ defm : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
}
//===----------------------------------------------------------------------===//
@@ -4842,6 +4853,16 @@ multiclass sve_int_un_pred_arit<bits<3> opc, string asm,
def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ def _B_UNDEF : PredOneOpPassthruPseudo<NAME # _B, ZPR8>;
+ def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
+ def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
+ def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Pseudo>(NAME # _B_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _S_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Pseudo>(NAME # _D_UNDEF)>;
}
multiclass sve_int_un_pred_arit_z<bits<3> opc, string asm, SDPatternOperator op> {
@@ -4850,10 +4871,10 @@ multiclass sve_int_un_pred_arit_z<bits<3> opc, string asm, SDPatternOperator op>
def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b0 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b0 }, asm, ZPR64>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_un_pred_arit_h<bits<3> opc, string asm,
@@ -4967,6 +4988,17 @@ multiclass sve_int_un_pred_arit_bitwise_fp<bits<3> opc, string asm,
def : SVE_1_Op_Passthru_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
+
+ def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
+ def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
+ def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Pseudo>(NAME # _S_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Pseudo>(NAME # _S_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Pseudo>(NAME # _D_UNDEF)>;
}
multiclass sve_int_un_pred_arit_bitwise_fp_z<bits<3> opc, string asm, SDPatternOperator op> {
@@ -4974,12 +5006,12 @@ multiclass sve_int_un_pred_arit_bitwise_fp_z<bits<3> opc, string asm, SDPatternO
def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b1 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b1 }, asm, ZPR64>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_PassthruUndefZero_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
+ defm : SVE_1_Op_PassthruUndefZero_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_fp_un_pred_arit_hsd<SDPatternOperator op> {
diff --git a/llvm/test/CodeGen/AArch64/zeroing-forms-abs-neg.ll b/llvm/test/CodeGen/AArch64/zeroing-forms-abs-neg.ll
index 1caee994220f05..510d4576646f12 100644
--- a/llvm/test/CodeGen/AArch64/zeroing-forms-abs-neg.ll
+++ b/llvm/test/CodeGen/AArch64/zeroing-forms-abs-neg.ll
@@ -18,7 +18,7 @@ define <vscale x 2 x double> @test_svabs_f64_x_1(<vscale x 2 x i1> %pg, <vscale
; CHECK-2p2-NEXT: fabs z0.d, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 2 x double> %0
}
@@ -34,7 +34,7 @@ define <vscale x 2 x double> @test_svabs_f64_x_2(<vscale x 2 x i1> %pg, double %
; CHECK-2p2-NEXT: fabs z0.d, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 2 x double> %0
}
@@ -65,7 +65,7 @@ define <vscale x 4 x float> @test_svabs_f32_x_1(<vscale x 4 x i1> %pg, <vscale x
; CHECK-2p2-NEXT: fabs z0.s, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 4 x float> %0
}
@@ -81,7 +81,7 @@ define <vscale x 4 x float> @test_svabs_f32_x_2(<vscale x 4 x i1> %pg, double %z
; CHECK-2p2-NEXT: fabs z0.s, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 4 x float> %0
}
@@ -112,7 +112,7 @@ define <vscale x 8 x half> @test_svabs_f16_x_1(<vscale x 8 x i1> %pg, <vscale x
; CHECK-2p2-NEXT: fabs z0.h, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 8 x half> %0
}
@@ -128,7 +128,7 @@ define <vscale x 8 x half> @test_svabs_f16_x_2(<vscale x 8 x i1> %pg, double %z0
; CHECK-2p2-NEXT: fabs z0.h, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 8 x half> %0
}
@@ -159,7 +159,7 @@ define <vscale x 16 x i8> @test_svabs_s8_x_1(<vscale x 16 x i1> %pg, <vscale x 1
; CHECK-2p2-NEXT: abs z0.b, p0/z, z0.b
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
ret <vscale x 16 x i8> %0
}
@@ -175,8 +175,8 @@ define <vscale x 16 x i8> @test_svabs_s8_x_2(<vscale x 16 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: abs z0.b, p0/z, z1.b
; CHECK-2p2-NEXT: ret
entry:
- %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
- ret <vscale x 16 x i8> %1
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
}
define <vscale x 16 x i8> @test_svabs_s8_z(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
@@ -191,8 +191,8 @@ define <vscale x 16 x i8> @test_svabs_s8_z(<vscale x 16 x i1> %pg, double %z0, <
; CHECK-2p2-NEXT: abs z0.b, p0/z, z1.b
; CHECK-2p2-NEXT: ret
entry:
- %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
- ret <vscale x 16 x i8> %1
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
}
define <vscale x 8 x i16> @test_svabs_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %x) {
@@ -206,7 +206,7 @@ define <vscale x 8 x i16> @test_svabs_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8
; CHECK-2p2-NEXT: abs z0.h, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
ret <vscale x 8 x i16> %0
}
@@ -222,7 +222,7 @@ define <vscale x 8 x i16> @test_svabs_s16_x_2(<vscale x 8 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: abs z0.h, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
ret <vscale x 8 x i16> %0
}
@@ -253,7 +253,7 @@ define <vscale x 4 x i32> @test_svabs_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4
; CHECK-2p2-NEXT: abs z0.s, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 4 x i32> %0
}
@@ -269,7 +269,7 @@ define <vscale x 4 x i32> @test_svabs_s32_x_2(<vscale x 4 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: abs z0.s, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 4 x i32> %0
}
@@ -300,7 +300,7 @@ define <vscale x 2 x i64> @test_svabs_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2
; CHECK-2p2-NEXT: abs z0.d, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.abs.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.abs.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 2 x i64> %0
}
@@ -316,7 +316,7 @@ define <vscale x 2 x i64> @test_svabs_s64_x_2(<vscale x 2 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: abs z0.d, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.abs.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.abs.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 2 x i64> %0
}
@@ -347,7 +347,7 @@ define <vscale x 2 x double> @test_svneg_f64_x_1(<vscale x 2 x i1> %pg, <vscale
; CHECK-2p2-NEXT: fneg z0.d, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fneg.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fneg.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 2 x double> %0
}
@@ -363,7 +363,7 @@ define <vscale x 2 x double> @test_svneg_f64_x_2(<vscale x 2 x i1> %pg, double %
; CHECK-2p2-NEXT: fneg z0.d, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fneg.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fneg.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 2 x double> %0
}
@@ -394,7 +394,7 @@ define <vscale x 4 x float> @test_svneg_f32_x_1(<vscale x 4 x i1> %pg, <vscale x
; CHECK-2p2-NEXT: fneg z0.s, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fneg.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fneg.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 4 x float> %0
}
@@ -410,7 +410,7 @@ define <vscale x 4 x float> @test_svneg_f32_x_2(<vscale x 4 x i1> %pg, double %z
; CHECK-2p2-NEXT: fneg z0.s, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fneg.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fneg.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 4 x float> %0
}
@@ -441,7 +441,7 @@ define <vscale x 8 x half> @test_svneg_f16_x_1(<vscale x 8 x i1> %pg, <vscale x
; CHECK-2p2-NEXT: fneg z0.h, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fneg.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fneg.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 8 x half> %0
}
@@ -457,7 +457,7 @@ define <vscale x 8 x half> @test_svneg_f16_x_2(<vscale x 8 x i1> %pg, double %z0
; CHECK-2p2-NEXT: fneg z0.h, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fneg.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fneg.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 8 x half> %0
}
@@ -488,7 +488,7 @@ define <vscale x 16 x i8> @test_svneg_s8_x_1(<vscale x 16 x i1> %pg, <vscale x 1
; CHECK-2p2-NEXT: neg z0.b, p0/z, z0.b
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.neg.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.neg.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
ret <vscale x 16 x i8> %0
}
@@ -504,8 +504,8 @@ define <vscale x 16 x i8> @test_svneg_s8_x_2(<vscale x 16 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: neg z0.b, p0/z, z1.b
; CHECK-2p2-NEXT: ret
entry:
- %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.neg.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
- ret <vscale x 16 x i8> %1
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.neg.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
}
define <vscale x 16 x i8> @test_svneg_s8_z(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
@@ -520,8 +520,8 @@ define <vscale x 16 x i8> @test_svneg_s8_z(<vscale x 16 x i1> %pg, double %z0, <
; CHECK-2p2-NEXT: neg z0.b, p0/z, z1.b
; CHECK-2p2-NEXT: ret
entry:
- %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.neg.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
- ret <vscale x 16 x i8> %1
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.neg.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
}
define <vscale x 8 x i16> @test_svneg_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %x) {
@@ -535,7 +535,7 @@ define <vscale x 8 x i16> @test_svneg_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8
; CHECK-2p2-NEXT: neg z0.h, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.neg.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.neg.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
ret <vscale x 8 x i16> %0
}
@@ -551,7 +551,7 @@ define <vscale x 8 x i16> @test_svneg_s16_x_2(<vscale x 8 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: neg z0.h, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.neg.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.neg.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
ret <vscale x 8 x i16> %0
}
@@ -582,7 +582,7 @@ define <vscale x 4 x i32> @test_svneg_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4
; CHECK-2p2-NEXT: neg z0.s, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.neg.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.neg.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 4 x i32> %0
}
@@ -598,7 +598,7 @@ define <vscale x 4 x i32> @test_svneg_s32_x_2(<vscale x 4 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: neg z0.s, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.neg.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.neg.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
ret <vscale x 4 x i32> %0
}
@@ -629,7 +629,7 @@ define <vscale x 2 x i64> @test_svneg_s64_x_1(<vscale x 2 x i1> %pg, <vscale x 2
; CHECK-2p2-NEXT: neg z0.d, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.neg.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.neg.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 2 x i64> %0
}
@@ -645,7 +645,7 @@ define <vscale x 2 x i64> @test_svneg_s64_x_2(<vscale x 2 x i1> %pg, double %z0,
; CHECK-2p2-NEXT: neg z0.d, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.neg.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.neg.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 2 x i64> %0
}
@@ -664,3 +664,535 @@ entry:
%0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.neg.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
ret <vscale x 2 x i64> %0
}
+
+define <vscale x 2 x double> @test_svfabs_f64_ptrue_u(double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_svfabs_f64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fabs z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfabs_f64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fabs z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_svfabs_f64_ptrue(double %z0, <vscale x 2 x double> %x, <vscale x 2 x double> %y) {
+; CHECK-LABEL: test_svfabs_f64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fabs z0.d, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfabs_f64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fabs z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double> %x, <vscale x 2 x i1> %pg, <vscale x 2 x double> %y)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 4 x float> @test_svfabs_f32_ptrue_u(double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_svfabs_f32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fabs z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfabs_f32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fabs z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_svfabs_f32_ptrue(double %z0, <vscale x 4 x float> %x, <vscale x 4 x float> %y) {
+; CHECK-LABEL: test_svfabs_f32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fabs z0.s, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfabs_f32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fabs z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float> %x, <vscale x 4 x i1> %pg, <vscale x 4 x float> %y)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 8 x half> @test_svfabs_f16_ptrue_u(double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_svfabs_f16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fabs z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfabs_f16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: fabs z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_svfabs_f16_ptrue(double %z0, <vscale x 8 x half> %x, <vscale x 8 x half> %y) {
+; CHECK-LABEL: test_svfabs_f16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fabs z0.h, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfabs_f16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: fabs z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half> %x, <vscale x 8 x i1> %pg, <vscale x 8 x half> %y)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 16 x i8> @test_svabs_s8_ptrue_u(double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svabs_s8_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: abs z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_s8_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.b
+; CHECK-2p2-NEXT: abs z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svabs_s8_ptrue(double %z0, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) {
+; CHECK-LABEL: test_svabs_s8_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: abs z0.b, p0/m, z2.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_s8_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.b
+; CHECK-2p2-NEXT: abs z0.b, p0/z, z2.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %y)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 8 x i16> @test_svabs_s16_ptrue_u(double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svabs_s16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: abs z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_s16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: abs z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nx84i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svabs_s16_ptrue(double %z0, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) {
+; CHECK-LABEL: test_svabs_s16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: abs z0.h, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_s16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: abs z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %y)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svabs_s32_ptrue_u(double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svabs_s32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: abs z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_s32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: abs z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svabs_s32_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: test_svabs_s32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: abs z0.s, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_s32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: abs z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %y)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svabs_s64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svabs_s64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: abs z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_s64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: abs z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.abs.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svabs_s64_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
+; CHECK-LABEL: test_svabs_s64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: abs z0.d, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_s64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: abs z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.abs.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x double> @test_svfneg_f64_ptrue_u(double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_svfneg_f64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fneg z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfneg_f64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fneg z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fneg.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_svfneg_f64_ptrue(double %z0, <vscale x 2 x double> %x, <vscale x 2 x double> %y) {
+; CHECK-LABEL: test_svfneg_f64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fneg z0.d, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfneg_f64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fneg z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fneg.nxv2f64(<vscale x 2 x double> %x, <vscale x 2 x i1> %pg, <vscale x 2 x double> %y)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 4 x float> @test_svfneg_f32_ptrue_u(double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_svfneg_f32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fneg z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfneg_f32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fneg z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fneg.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_svfneg_f32_ptrue(double %z0, <vscale x 4 x float> %x, <vscale x 4 x float> %y) {
+; CHECK-LABEL: test_svfneg_f32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fneg z0.s, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfneg_f32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fneg z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fneg.nxv4f32(<vscale x 4 x float> %x, <vscale x 4 x i1> %pg, <vscale x 4 x float> %y)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 8 x half> @test_svfneg_f16_ptrue_u(double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_svfneg_f16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fneg z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfneg_f16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: fneg z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fneg.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_svfneg_f16_ptrue(double %z0, <vscale x 8 x half> %x, <vscale x 8 x half> %y) {
+; CHECK-LABEL: test_svfneg_f16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fneg z0.h, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svfneg_f16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: fneg z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fneg.nxv8f16(<vscale x 8 x half> %x, <vscale x 8 x i1> %pg, <vscale x 8 x half> %y)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 16 x i8> @test_svneg_s8_ptrue_u(double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svneg_s8_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: neg z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svneg_s8_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.b
+; CHECK-2p2-NEXT: neg z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.neg.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svneg_s8_ptrue(double %z0, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) {
+; CHECK-LABEL: test_svneg_s8_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: neg z0.b, p0/m, z2.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svneg_s8_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.b
+; CHECK-2p2-NEXT: neg z0.b, p0/z, z2.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.neg.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %y)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 8 x i16> @test_svneg_s16_ptrue_u(double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svneg_s16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: neg z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svneg_s16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: neg z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.neg.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svneg_s16_ptrue(double %z0, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) {
+; CHECK-LABEL: test_svneg_s16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: neg z0.h, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svneg_s16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: neg z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.neg.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %y)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svneg_s32_ptrue_u(double %z0, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svneg_s32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: neg z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svneg_s32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: neg z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.neg.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svneg_s32_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
+; CHECK-LABEL: test_svneg_s32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: neg z0.s, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svneg_s32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: neg z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.neg.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %y)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_svneg_s64_ptrue_u(double %z0, <vscale x 2 x i64> %x) {
+; CHECK-LABEL: test_svneg_s64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: neg z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svneg_s64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: neg z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.neg.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_svneg_s64_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
+; CHECK-LABEL: test_svneg_s64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: neg z0.d, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svneg_s64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: neg z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.neg.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %y)
+ ret <vscale x 2 x i64> %0
+}
diff --git a/llvm/test/CodeGen/AArch64/zeroing-forms-fcvt-bfcvt.ll b/llvm/test/CodeGen/AArch64/zeroing-forms-fcvt-bfcvt.ll
index cf9ac49ca7b236..855bf9a3b3c491 100644
--- a/llvm/test/CodeGen/AArch64/zeroing-forms-fcvt-bfcvt.ll
+++ b/llvm/test/CodeGen/AArch64/zeroing-forms-fcvt-bfcvt.ll
@@ -18,7 +18,7 @@ define <vscale x 8 x half> @test_svcvt_f16_f32_x_1(<vscale x 4 x i1> %pg, <vscal
; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 8 x half> %0
}
@@ -33,7 +33,7 @@ define <vscale x 8 x half> @test_svcvt_f16_f32_x_2(<vscale x 4 x i1> %pg, double
; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 8 x half> %0
}
@@ -64,7 +64,7 @@ define <vscale x 8 x bfloat> @test_svcvt_bf16_f32_x_1(<vscale x 4 x i1> %pg, <vs
; CHECK-2p2-NEXT: bfcvt z0.h, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 8 x bfloat> %0
}
@@ -79,7 +79,7 @@ define <vscale x 8 x bfloat> @test_svcvt_bf16_f32_x_2(<vscale x 4 x i1> %pg, dou
; CHECK-2p2-NEXT: bfcvt z0.h, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 8 x bfloat> %0
}
@@ -110,7 +110,7 @@ define <vscale x 8 x half> @test_svcvt_f16_f64_x_1(<vscale x 2 x i1> %pg, <vscal
; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 8 x half> %0
}
@@ -125,7 +125,7 @@ define <vscale x 8 x half> @test_svcvt_f16_f64_x_2(<vscale x 2 x i1> %pg, double
; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 8 x half> %0
}
@@ -156,7 +156,7 @@ define <vscale x 4 x float> @test_svcvt_f32_f64_x_1(<vscale x 2 x i1> %pg, <vsca
; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 4 x float> %0
}
@@ -171,7 +171,7 @@ define <vscale x 4 x float> @test_svcvt_f32_f64_x_2(<vscale x 2 x i1> %pg, doubl
; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 4 x float> %0
}
@@ -202,7 +202,7 @@ define <vscale x 4 x float> @test_svcvt_f32_f16_x_1(<vscale x 4 x i1> %pg, <vsca
; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 4 x float> %0
}
@@ -217,7 +217,7 @@ define <vscale x 4 x float> @test_svcvt_f32_f16_x_2(<vscale x 4 x i1> %pg, doubl
; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 4 x float> %0
}
@@ -248,7 +248,7 @@ define <vscale x 2 x double> @test_svcvt_f64_f16_x_1(<vscale x 2 x i1> %pg, <vsc
; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 2 x double> %0
}
@@ -263,7 +263,7 @@ define <vscale x 2 x double> @test_svcvt_f64_f16_x_2(<vscale x 2 x i1> %pg, doub
; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 2 x double> %0
}
@@ -294,7 +294,7 @@ define <vscale x 2 x double> @test_svcvt_f64_f32_x_1(<vscale x 2 x i1> %pg, <vsc
; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 2 x double> %0
}
@@ -309,7 +309,7 @@ define <vscale x 2 x double> @test_svcvt_f64_f32_x_2(<vscale x 2 x i1> %pg, doub
; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 2 x double> %0
}
@@ -328,3 +328,262 @@ entry:
%0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 2 x double> %0
}
+
+define <vscale x 8 x half> @test_svcvt_f16_f32_ptrue_u(double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_svcvt_f16_f32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvt z0.h, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f16_f32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_svcvt_f16_f32_ptrue(double %z0, <vscale x 8 x half> %x, <vscale x 4 x float> %y ) {
+; CHECK-LABEL: test_svcvt_f16_f32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvt z0.h, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f16_f32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> %x, <vscale x 4 x i1> %pg, <vscale x 4 x float> %y)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x bfloat> @test_svcvt_bf16_f32_ptrue_u(double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_svcvt_bf16_f32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: bfcvt z0.h, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_bf16_f32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: bfcvt z0.h, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 8 x bfloat> %0
+}
+
+define <vscale x 8 x bfloat> @test_svcvt_bf16_f32_ptrue(double %z0, <vscale x 8 x bfloat> %x, <vscale x 4 x float> %y ) {
+; CHECK-LABEL: test_svcvt_bf16_f32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: bfcvt z0.h, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_bf16_f32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: bfcvt z0.h, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> %x, <vscale x 4 x i1> %pg, <vscale x 4 x float> %y)
+ ret <vscale x 8 x bfloat> %0
+}
+
+define <vscale x 8 x half> @test_svcvt_f16_f64_ptrue_u(double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_svcvt_f16_f64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvt z0.h, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f16_f64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_svcvt_f16_f64_ptrue(double %z0, <vscale x 8 x half> %x, <vscale x 2 x double> %y ) {
+; CHECK-LABEL: test_svcvt_f16_f64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvt z0.h, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f16_f64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> %x, <vscale x 2 x i1> %pg, <vscale x 2 x double> %y)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 4 x float> @test_svcvt_f32_f64_ptrue_u(double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_svcvt_f32_f64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvt z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f32_f64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_svcvt_f32_f64_ptrue(double %z0, <vscale x 4 x float> %x, <vscale x 2 x double> %y ) {
+; CHECK-LABEL: test_svcvt_f32_f64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvt z0.s, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f32_f64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> %x, <vscale x 2 x i1> %pg, <vscale x 2 x double> %y)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_svcvt_f32_f16_ptrue_u(double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_svcvt_f32_f16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvt z0.s, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f32_f16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_svcvt_f32_f16_ptrue(double %z0, <vscale x 4 x float> %x, <vscale x 8 x half> %y ) {
+; CHECK-LABEL: test_svcvt_f32_f16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvt z0.s, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f32_f16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> %x, <vscale x 4 x i1> %pg, <vscale x 8 x half> %y)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 2 x double> @test_svcvt_f64_f16_ptrue_u(double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_svcvt_f64_f16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvt z0.d, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f64_f16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_svcvt_f64_f16_ptrue(double %z0, <vscale x 2 x double> %x, <vscale x 8 x half> %y ) {
+; CHECK-LABEL: test_svcvt_f64_f16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvt z0.d, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f64_f16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> %x, <vscale x 2 x i1> %pg, <vscale x 8 x half> %y)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_svcvt_f64_f32_ptrue_u(double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_svcvt_f64_f32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvt z0.d, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f64_f32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_svcvt_f64_f32_ptrue(double %z0, <vscale x 2 x double> %x, <vscale x 4 x float> %y ) {
+; CHECK-LABEL: test_svcvt_f64_f32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvt z0.d, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvt_f64_f32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> %x, <vscale x 2 x i1> %pg, <vscale x 4 x float> %y)
+ ret <vscale x 2 x double> %0
+}
diff --git a/llvm/test/CodeGen/AArch64/zeroing-forms-fcvtlt-fcvtx.ll b/llvm/test/CodeGen/AArch64/zeroing-forms-fcvtlt-fcvtx.ll
index 60879b1529230f..c7431e11c21ca3 100644
--- a/llvm/test/CodeGen/AArch64/zeroing-forms-fcvtlt-fcvtx.ll
+++ b/llvm/test/CodeGen/AArch64/zeroing-forms-fcvtlt-fcvtx.ll
@@ -18,7 +18,7 @@ define <vscale x 4 x float> @test_svcvtlt_f32_f16_x_1(<vscale x 4 x i1> %pg, <vs
; CHECK-2p2-NEXT: fcvtlt z0.s, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 4 x float> %0
}
@@ -33,7 +33,7 @@ define <vscale x 4 x float> @test_svcvtlt_f32_f16_x_2(<vscale x 4 x i1> %pg, dou
; CHECK-2p2-NEXT: fcvtlt z0.s, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 4 x float> %0
}
@@ -64,7 +64,7 @@ define <vscale x 2 x double> @test_svcvtlt_f64_f32_x_1(<vscale x 2 x i1> %pg, <v
; CHECK-2p2-NEXT: fcvtlt z0.d, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 2 x double> %0
}
@@ -79,7 +79,7 @@ define <vscale x 2 x double> @test_svcvtlt_f64_f32_x_2(<vscale x 2 x i1> %pg, do
; CHECK-2p2-NEXT: fcvtlt z0.d, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 2 x double> %0
}
@@ -110,7 +110,7 @@ define <vscale x 4 x float> @test_svcvtx_f32_f64_x_1(<vscale x 2 x i1> %pg, <vsc
; CHECK-2p2-NEXT: fcvtx z0.s, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 4 x float> %0
}
@@ -125,7 +125,7 @@ define <vscale x 4 x float> @test_svcvtx_f32_f64_x_2(<vscale x 2 x i1> %pg, doub
; CHECK-2p2-NEXT: fcvtx z0.s, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 4 x float> %0
}
@@ -144,3 +144,114 @@ entry:
%0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 4 x float> %0
}
+
+define <vscale x 4 x float> @test_svcvtlt_f32_f16_ptrue_u(double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_svcvtlt_f32_f16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvtlt z0.s, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvtlt_f32_f16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fcvtlt z0.s, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> poison, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_svcvtlt_f32_f16_ptrue(double %z0, <vscale x 4 x float> %x, <vscale x 8 x half> %y) {
+; CHECK-LABEL: test_svcvtlt_f32_f16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvtlt z0.s, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvtlt_f32_f16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fcvtlt z0.s, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> %x, <vscale x 4 x i1> %pg, <vscale x 8 x half> %y)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 2 x double> @test_svcvtlt_f64_f32_ptrue_u(double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_svcvtlt_f64_f32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtlt z0.d, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvtlt_f64_f32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtlt z0.d, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> poison, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_svcvtlt_f64_f32_ptrue(double %z0, <vscale x 2 x double> %x, <vscale x 4 x float> %y) {
+; CHECK-LABEL: test_svcvtlt_f64_f32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtlt z0.d, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvtlt_f64_f32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtlt z0.d, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> %x, <vscale x 2 x i1> %pg, <vscale x 4 x float> %y)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 4 x float> @test_svcvtx_f32_f64_ptrue_u(double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_svcvtx_f32_f64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtx z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvtx_f32_f64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtx z0.s, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_svcvtx_f32_f64_ptrue(double %z0, <vscale x 4 x float> %x, <vscale x 2 x double> %y) {
+; CHECK-LABEL: test_svcvtx_f32_f64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtx z0.s, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svcvtx_f32_f64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtx z0.s, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> %x, <vscale x 2 x i1> %pg, <vscale x 2 x double> %y)
+ ret <vscale x 4 x float> %0
+}
diff --git a/llvm/test/CodeGen/AArch64/zeroing-forms-fcvtzsu.ll b/llvm/test/CodeGen/AArch64/zeroing-forms-fcvtzsu.ll
index b8b36d390330af..7259502bf44002 100644
--- a/llvm/test/CodeGen/AArch64/zeroing-forms-fcvtzsu.ll
+++ b/llvm/test/CodeGen/AArch64/zeroing-forms-fcvtzsu.ll
@@ -18,7 +18,7 @@ define <vscale x 4 x i32> @test_fcvtzs_s32_f64_x_1(<vscale x 2 x i1> %pg, <vscal
; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f64(<vscale x 4 x i32> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f64(<vscale x 4 x i32> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 4 x i32> %0
}
@@ -33,7 +33,7 @@ define <vscale x 4 x i32> @test_fcvtzs_s32_f64_x_2(<vscale x 2 x i1> %pg, double
; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f64(<vscale x 4 x i32> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f64(<vscale x 4 x i32> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 4 x i32> %0
}
@@ -64,7 +64,7 @@ define <vscale x 2 x i64> @test_fcvtzs_s64_f32_x_1(<vscale x 2 x i1> %pg, <vscal
; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f32(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f32(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 2 x i64> %0
}
@@ -79,7 +79,7 @@ define <vscale x 2 x i64> @test_fcvtzs_s64_f32_x_2(<vscale x 2 x i1> %pg, double
; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f32(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f32(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 2 x i64> %0
}
@@ -110,7 +110,7 @@ define <vscale x 4 x i32> @test_fcvtzs_s32_f16_x_1(<vscale x 4 x i1> %pg, <vscal
; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f16(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f16(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 4 x i32> %0
}
@@ -125,7 +125,7 @@ define <vscale x 4 x i32> @test_fcvtzs_s32_f16_x_2(<vscale x 4 x i1> %pg, double
; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f16(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f16(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 4 x i32> %0
}
@@ -156,7 +156,7 @@ define <vscale x 2 x i64> @test_fcvtzs_s64_f16_x_1(<vscale x 2 x i1> %pg, <vscal
; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f16(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f16(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 2 x i64> %0
}
@@ -171,7 +171,7 @@ define <vscale x 2 x i64> @test_fcvtzs_s64_f16_x_2(<vscale x 2 x i1> %pg, double
; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f16(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f16(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 2 x i64> %0
}
@@ -202,7 +202,7 @@ define <vscale x 4 x i32> @test_fcvtzu_u32_f64_x_1(<vscale x 2 x i1> %pg, <vscal
; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f64(<vscale x 4 x i32> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f64(<vscale x 4 x i32> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 4 x i32> %0
}
@@ -217,7 +217,7 @@ define <vscale x 4 x i32> @test_fcvtzu_u32_f64_x_2(<vscale x 2 x i1> %pg, double
; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f64(<vscale x 4 x i32> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f64(<vscale x 4 x i32> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 4 x i32> %0
}
@@ -248,7 +248,7 @@ define <vscale x 2 x i64> @test_fcvtzu_u64_f32_x_1(<vscale x 2 x i1> %pg, <vscal
; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f32(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f32(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 2 x i64> %0
}
@@ -263,7 +263,7 @@ define <vscale x 2 x i64> @test_fcvtzu_u64_f32_x_2(<vscale x 2 x i1> %pg, double
; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f32(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f32(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 2 x i64> %0
}
@@ -294,7 +294,7 @@ define <vscale x 4 x i32> @test_fcvtzu_u32_f16_x_1(<vscale x 4 x i1> %pg, <vscal
; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f16(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f16(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 4 x i32> %0
}
@@ -309,7 +309,7 @@ define <vscale x 4 x i32> @test_fcvtzu_u32_f16_x_2(<vscale x 4 x i1> %pg, double
; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f16(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f16(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 4 x i32> %0
}
@@ -340,7 +340,7 @@ define <vscale x 2 x i64> @test_fcvtzu_u64_f16_x_1(<vscale x 2 x i1> %pg, <vscal
; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f16(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f16(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 2 x i64> %0
}
@@ -355,7 +355,7 @@ define <vscale x 2 x i64> @test_fcvtzu_u64_f16_x_2(<vscale x 2 x i1> %pg, double
; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f16(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f16(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 2 x i64> %0
}
@@ -387,7 +387,7 @@ define <vscale x 8 x i16> @test_svcvt_s16_f16_x_1(<vscale x 8 x i1> %pg, <vscale
; CHECK-2p2-NEXT: fcvtzs z0.h, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 8 x i16> %0
}
@@ -403,7 +403,7 @@ define <vscale x 8 x i16> @test_svcvt_s16_f16_x_2(<vscale x 8 x i1> %pg, double
; CHECK-2p2-NEXT: fcvtzs z0.h, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 8 x i16> %0
}
@@ -434,7 +434,7 @@ define <vscale x 8 x i16> @test_svcvt_u16_f16_x_1(<vscale x 8 x i1> %pg, <vscale
; CHECK-2p2-NEXT: fcvtzu z0.h, p0/z, z0.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 8 x i16> %0
}
@@ -450,7 +450,7 @@ define <vscale x 8 x i16> @test_svcvt_u16_f16_x_2(<vscale x 8 x i1> %pg, double
; CHECK-2p2-NEXT: fcvtzu z0.h, p0/z, z1.h
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
ret <vscale x 8 x i16> %0
}
@@ -481,7 +481,7 @@ define <vscale x 4 x i32> @test_svcvt_s32_f32_x_1(<vscale x 4 x i1> %pg, <vscale
; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 4 x i32> %0
}
@@ -497,7 +497,7 @@ define <vscale x 4 x i32> @test_svcvt_s32_f32_x_2(<vscale x 4 x i1> %pg, double
; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 4 x i32> %0
}
@@ -528,7 +528,7 @@ define <vscale x 4 x i32> @test_svcvt_u32_f32_x_1(<vscale x 4 x i1> %pg, <vscale
; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z0.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 4 x i32> %0
}
@@ -544,7 +544,7 @@ define <vscale x 4 x i32> @test_svcvt_u32_f32_x_2(<vscale x 4 x i1> %pg, double
; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z1.s
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
ret <vscale x 4 x i32> %0
}
@@ -575,7 +575,7 @@ define <vscale x 2 x i64> @test_svcvt_s64_f64_x_1(<vscale x 2 x i1> %pg, <vscale
; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 2 x i64> %0
}
@@ -591,7 +591,7 @@ define <vscale x 2 x i64> @test_svcvt_s64_f64_x_2(<vscale x 2 x i1> %pg, double
; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 2 x i64> %0
}
@@ -622,7 +622,7 @@ define <vscale x 2 x i64> @test_svcvt_u64_f64_x_1(<vscale x 2 x i1> %pg, <vscale
; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z0.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 2 x i64> %0
}
@@ -638,7 +638,7 @@ define <vscale x 2 x i64> @test_svcvt_u64_f64_x_2(<vscale x 2 x i1> %pg, double
; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z1.d
; CHECK-2p2-NEXT: ret
entry:
- %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 2 x i64> %0
}
@@ -657,3 +657,527 @@ entry:
%0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
ret <vscale x 2 x i64> %0
}
+
+define <vscale x 4 x i32> @test_fcvtzs_i32_f64_ptrue_u(double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_fcvtzs_i32_f64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzs_i32_f64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f64(<vscale x 4 x i32> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_fcvtzs_i32_f64_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 2 x double> %y) {
+; CHECK-LABEL: test_fcvtzs_i32_f64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzs_i32_f64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f64(<vscale x 4 x i32> %x, <vscale x 2 x i1> %pg, <vscale x 2 x double> %y)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_fcvtzu_i32_f64_ptrue_u(double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_fcvtzu_i32_f64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzu_i32_f64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f64(<vscale x 4 x i32> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_fcvtzu_i32_f64_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 2 x double> %y) {
+; CHECK-LABEL: test_fcvtzu_i32_f64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzu_i32_f64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f64(<vscale x 4 x i32> %x, <vscale x 2 x i1> %pg, <vscale x 2 x double> %y)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_fcvtzs_i64_f32_ptrue_u(double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_fcvtzs_i64_f32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzs_i64_f32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f32(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_fcvtzs_i64_f32_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 4 x float> %y) {
+; CHECK-LABEL: test_fcvtzs_i64_f32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzs_i64_f32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f32(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 4 x float> %y)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_fcvtzu_i64_f32_ptrue_u(double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_fcvtzu_i64_f32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzu_i64_f32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f32(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_fcvtzu_i64_f32_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 4 x float> %y) {
+; CHECK-LABEL: test_fcvtzu_i64_f32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzu_i64_f32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f32(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 4 x float> %y)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 4 x i32> @test_fcvtzs_i32_f16_ptrue_u(double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_fcvtzs_i32_f16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzs_i32_f16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f16(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_fcvtzs_i32_f16_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 8 x half> %y) {
+; CHECK-LABEL: test_fcvtzs_i32_f16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzs_i32_f16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f16(<vscale x 4 x i32> %x, <vscale x 4 x i1> %pg, <vscale x 8 x half> %y)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_fcvtzu_i32_f16_ptrue_u(double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_fcvtzu_i32_f16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzu_i32_f16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f16(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_fcvtzu_i32_f16_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 8 x half> %y) {
+; CHECK-LABEL: test_fcvtzu_i32_f16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzu_i32_f16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f16(<vscale x 4 x i32> %x, <vscale x 4 x i1> %pg, <vscale x 8 x half> %y)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_fcvtzs_i64_f16_ptrue_u(double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_fcvtzs_i64_f16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzs_i64_f16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f16(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_fcvtzs_i64_f16_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 8 x half> %y) {
+; CHECK-LABEL: test_fcvtzs_i64_f16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzs_i64_f16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f16(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 8 x half> %y)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_fcvtzu_i64_f16_ptrue_u(double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_fcvtzu_i64_f16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzu_i64_f16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f16(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_fcvtzu_i64_f16_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 8 x half> %y) {
+; CHECK-LABEL: test_fcvtzu_i64_f16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzu_i64_f16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f16(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 8 x half> %y)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 8 x i16> @test_fcvtzs_i16_f16_ptrue_u(double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_fcvtzs_i16_f16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fcvtzs z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzs_i16_f16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: fcvtzs z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_fcvtzs_i16_f16_ptrue(double %z0, <vscale x 8 x i16> %x, <vscale x 8 x half> %y) {
+; CHECK-LABEL: test_fcvtzs_i16_f16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fcvtzs z0.h, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzs_i16_f16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: fcvtzs z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16(<vscale x 8 x i16> %x, <vscale x 8 x i1> %pg, <vscale x 8 x half> %y)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_fcvtzu_i16_f16_ptrue_u(double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_fcvtzu_i16_f16_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fcvtzu z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzu_i16_f16_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: fcvtzu z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16(<vscale x 8 x i16> poison, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_fcvtzu_i16_f16_ptrue(double %z0, <vscale x 8 x i16> %x, <vscale x 8 x half> %y) {
+; CHECK-LABEL: test_fcvtzu_i16_f16_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fcvtzu z0.h, p0/m, z2.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzu_i16_f16_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.h
+; CHECK-2p2-NEXT: fcvtzu z0.h, p0/z, z2.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16(<vscale x 8 x i16> %x, <vscale x 8 x i1> %pg, <vscale x 8 x half> %y)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_fcvtzs_i32_f32_ptrue_u(double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_fcvtzs_i32_f32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzs_i32_f32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_fcvtzs_i32_f32_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 4 x float> %y) {
+; CHECK-LABEL: test_fcvtzs_i32_f32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzs_i32_f32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32(<vscale x 4 x i32> %x, <vscale x 4 x i1> %pg, <vscale x 4 x float> %y)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_fcvtzu_i32_f32_ptrue_u(double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_fcvtzu_i32_f32_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzu_i32_f32_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32(<vscale x 4 x i32> poison, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_fcvtzu_i32_f32_ptrue(double %z0, <vscale x 4 x i32> %x, <vscale x 4 x float> %y) {
+; CHECK-LABEL: test_fcvtzu_i32_f32_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z2.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzu_i32_f32_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.s
+; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z2.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32(<vscale x 4 x i32> %x, <vscale x 4 x i1> %pg, <vscale x 4 x float> %y)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 2 x i64> @test_fcvtzs_i64_f64_ptrue_u(double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_fcvtzs_i64_f64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzs_i64_f64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_fcvtzs_i64_f64_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 2 x double> %y) {
+; CHECK-LABEL: test_fcvtzs_i64_f64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fcvtzs z0.d, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzs_i64_f64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 2 x double> %y)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_fcvtzu_i64_f64_ptrue_u(double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_fcvtzu_i64_f64_ptrue_u:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzu_i64_f64_ptrue_u:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64(<vscale x 2 x i64> poison, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 2 x i64> @test_fcvtzu_i64_f64_ptrue(double %z0, <vscale x 2 x i64> %x, <vscale x 2 x double> %y) {
+; CHECK-LABEL: test_fcvtzu_i64_f64_ptrue:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z2
+; CHECK-NEXT: fcvtzu z0.d, p0/m, z2.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_fcvtzu_i64_f64_ptrue:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: ptrue p0.d
+; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z2.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64(<vscale x 2 x i64> %x, <vscale x 2 x i1> %pg, <vscale x 2 x double> %y)
+ ret <vscale x 2 x i64> %0
+}
More information about the llvm-commits
mailing list