[llvm] 81eafb8 - [AArch64][SVE] Break false dependencies for inactive lanes of unary operations
Bradley Smith via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 26 08:01:39 PDT 2021
Author: Bradley Smith
Date: 2021-07-26T15:01:21Z
New Revision: 81eafb8a37c9f0eecd3b73642d94eee3bae490bf
URL: https://github.com/llvm/llvm-project/commit/81eafb8a37c9f0eecd3b73642d94eee3bae490bf
DIFF: https://github.com/llvm/llvm-project/commit/81eafb8a37c9f0eecd3b73642d94eee3bae490bf.diff
LOG: [AArch64][SVE] Break false dependencies for inactive lanes of unary operations
Differential Revision: https://reviews.llvm.org/D105889
Added:
llvm/test/CodeGen/AArch64/sve-unary-movprfx.ll
llvm/test/CodeGen/AArch64/sve2-unary-movprfx.ll
Modified:
llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
llvm/lib/Target/AArch64/AArch64InstrFormats.td
llvm/lib/Target/AArch64/AArch64InstrInfo.h
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/lib/Target/AArch64/SVEInstrFormats.td
llvm/test/CodeGen/AArch64/sve-forward-st-to-ld.ll
llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll
llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index f8fa9ff3c068..b2eee2845ba9 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -466,6 +466,9 @@ bool AArch64ExpandPseudo::expand_DestructiveOp(
case AArch64::DestructiveBinaryImm:
std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
break;
+ case AArch64::DestructiveUnaryPassthru:
+ std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3);
+ break;
case AArch64::DestructiveTernaryCommWithRev:
std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4);
if (DstReg == MI.getOperand(3).getReg()) {
@@ -494,6 +497,7 @@ bool AArch64ExpandPseudo::expand_DestructiveOp(
DstReg != MI.getOperand(DOPIdx).getReg() ||
MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();
break;
+ case AArch64::DestructiveUnaryPassthru:
case AArch64::DestructiveBinaryImm:
DOPRegIsUnique = true;
break;
@@ -578,6 +582,11 @@ bool AArch64ExpandPseudo::expand_DestructiveOp(
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
switch (DType) {
+ case AArch64::DestructiveUnaryPassthru:
+ DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
+ .add(MI.getOperand(PredIdx))
+ .add(MI.getOperand(SrcIdx));
+ break;
case AArch64::DestructiveBinaryImm:
case AArch64::DestructiveBinaryComm:
case AArch64::DestructiveBinaryCommWithRev:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 80b8a3fdd9e2..9bc2539e95f0 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -36,6 +36,7 @@ def DestructiveBinary : DestructiveInstTypeEnum<5>;
def DestructiveBinaryComm : DestructiveInstTypeEnum<6>;
def DestructiveBinaryCommWithRev : DestructiveInstTypeEnum<7>;
def DestructiveTernaryCommWithRev : DestructiveInstTypeEnum<8>;
+def DestructiveUnaryPassthru : DestructiveInstTypeEnum<9>;
class FalseLanesEnum<bits<2> val> {
bits<2> Value = val;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 406cfb03d584..e25189e409a3 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -482,6 +482,7 @@ enum DestructiveInstType {
DestructiveBinaryComm = TSFLAG_DESTRUCTIVE_INST_TYPE(0x6),
DestructiveBinaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x7),
DestructiveTernaryCommWithRev = TSFLAG_DESTRUCTIVE_INST_TYPE(0x8),
+ DestructiveUnaryPassthru = TSFLAG_DESTRUCTIVE_INST_TYPE(0x9),
};
enum FalseLaneType {
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 456093294706..91c3aec30a15 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1715,12 +1715,12 @@ let Predicates = [HasSVE] in {
def : Pat<(nxv2i64 (AArch64ld1rq_z PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))),
(LD1RQ_D_IMM $gp, $base, simm4s16:$imm)>;
- def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (SXTW_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
- def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (SXTH_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
- def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i8), (SXTB_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
- def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (SXTH_ZPmZ_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>;
- def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i8), (SXTB_ZPmZ_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>;
- def : Pat<(sext_inreg (nxv8i16 ZPR:$Zs), nxv8i8), (SXTB_ZPmZ_H (IMPLICIT_DEF), (PTRUE_H 31), ZPR:$Zs)>;
+ def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (SXTW_ZPmZ_UNDEF_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
+ def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (SXTH_ZPmZ_UNDEF_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
+ def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i8), (SXTB_ZPmZ_UNDEF_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
+ def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (SXTH_ZPmZ_UNDEF_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>;
+ def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i8), (SXTB_ZPmZ_UNDEF_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>;
+ def : Pat<(sext_inreg (nxv8i16 ZPR:$Zs), nxv8i8), (SXTB_ZPmZ_UNDEF_H (IMPLICIT_DEF), (PTRUE_H 31), ZPR:$Zs)>;
// General case that we ideally never want to match.
def : Pat<(vscale GPR64:$scale), (MADDXrrr (UBFMXri (RDVLI_XI 1), 4, 63), $scale, XZR)>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 005a22cab82d..02d3a765a802 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -340,6 +340,15 @@ class SVE_1_Op_Passthru_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
: Pat<(vtd (op pg:$Op1, vts:$Op2, vtd:$Op3)),
(inst $Op3, $Op1, $Op2)>;
+
+multiclass SVE_1_Op_PassthruUndef_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
+ ValueType vts, Instruction inst> {
+ def : Pat<(vtd (op pg:$Op1, vts:$Op2, (vtd undef))),
+ (inst (IMPLICIT_DEF), $Op1, $Op2)>;
+ def : Pat<(vtd (op (pg (SVEAllActive:$Op1)), vts:$Op2, vtd:$Op3)),
+ (inst $Op3, $Op1, $Op2)>;
+}
+
// Used to match FP_ROUND_MERGE_PASSTHRU, which has an additional flag for the
// type of rounding. This is matched by timm0_1 in pattern below and ignored.
class SVE_1_Op_Passthru_Round_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
@@ -389,6 +398,14 @@ class SVE_3_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3)),
(inst $Op1, $Op2, $Op3)>;
+multiclass SVE_3_Op_Undef_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
+ ValueType vt2, ValueType vt3, Instruction inst> {
+ def : Pat<(vtd (op (vt1 undef), vt2:$Op1, vt3:$Op2)),
+ (inst (IMPLICIT_DEF), $Op1, $Op2)>;
+ def : Pat<(vtd (op vt1:$Op1, (vt2 (SVEAllActive:$Op2)), vt3:$Op3)),
+ (inst $Op1, $Op2, $Op3)>;
+}
+
class SVE_4_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, ValueType vt3, ValueType vt4,
Instruction inst>
@@ -447,6 +464,14 @@ class SVE_InReg_Extend<ValueType vt, SDPatternOperator op, ValueType pt,
: Pat<(vt (op pt:$Pg, vt:$Src, inreg_vt, vt:$PassThru)),
(inst $PassThru, $Pg, $Src)>;
+multiclass SVE_InReg_Extend_PassthruUndef<ValueType vt, SDPatternOperator op, ValueType pt,
+ ValueType inreg_vt, Instruction inst> {
+ def : Pat<(vt (op pt:$Pg, vt:$Src, inreg_vt, (vt undef))),
+ (inst (IMPLICIT_DEF), $Pg, $Src)>;
+ def : Pat<(vt (op (pt (SVEAllActive:$Pg)), vt:$Src, inreg_vt, vt:$PassThru)),
+ (inst $PassThru, $Pg, $Src)>;
+}
+
class SVE_Shift_DupImm_Pred_Pat<ValueType vt, SDPatternOperator op,
ValueType pt, ValueType it,
ComplexPattern cast, Instruction inst>
@@ -524,6 +549,15 @@ let hasNoSchedulingInfo = 1 in {
}
}
+//
+// Pseudos for passthru operands
+//
+let hasNoSchedulingInfo = 1 in {
+ class PredOneOpPassthruPseudo<string name, ZPRRegOp zprty>
+ : SVEPseudo2Instr<name, 0>,
+ Pseudo<(outs zprty:$Zd), (ins zprty:$Passthru, PPR3bAny:$Pg, zprty:$Zs), []>;
+}
+
//===----------------------------------------------------------------------===//
// SVE Predicate Misc Group
//===----------------------------------------------------------------------===//
@@ -3252,26 +3286,46 @@ class sve2_int_un_pred_arit<bits<2> sz, bit Q, bits<2> opc,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
- let DestructiveInstType = DestructiveOther;
+ let DestructiveInstType = DestructiveUnaryPassthru;
let ElementSize = zprty.ElementSize;
}
multiclass sve2_int_un_pred_arit_s<bits<3> opc, string asm,
SDPatternOperator op> {
- def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>;
+ def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>,
+ SVEPseudo2Instr<NAME # _S, 1>;
+
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+
+ def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
+
+ defm : SVE_3_Op_Undef_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
}
multiclass sve2_int_un_pred_arit<bits<3> opc, string asm, SDPatternOperator op> {
- def _B : sve2_int_un_pred_arit<0b00, opc{2}, opc{1-0}, asm, ZPR8>;
- def _H : sve2_int_un_pred_arit<0b01, opc{2}, opc{1-0}, asm, ZPR16>;
- def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>;
- def _D : sve2_int_un_pred_arit<0b11, opc{2}, opc{1-0}, asm, ZPR64>;
+ def _B : sve2_int_un_pred_arit<0b00, opc{2}, opc{1-0}, asm, ZPR8>,
+ SVEPseudo2Instr<NAME # _B, 1>;
+ def _H : sve2_int_un_pred_arit<0b01, opc{2}, opc{1-0}, asm, ZPR16>,
+ SVEPseudo2Instr<NAME # _H, 1>;
+ def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>,
+ SVEPseudo2Instr<NAME # _S, 1>;
+ def _D : sve2_int_un_pred_arit<0b11, opc{2}, opc{1-0}, asm, ZPR64>,
+ SVEPseudo2Instr<NAME # _D, 1>;
def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ def _UNDEF_B : PredOneOpPassthruPseudo<NAME # _B, ZPR8>;
+ def _UNDEF_H : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
+ def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
+ def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+
+ defm : SVE_3_Op_Undef_Pat<nxv16i8, op, nxv16i8, nxv16i1, nxv16i8, !cast<Pseudo>(NAME # _UNDEF_B)>;
+ defm : SVE_3_Op_Undef_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8i16, !cast<Pseudo>(NAME # _UNDEF_H)>;
+ defm : SVE_3_Op_Undef_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
+ defm : SVE_3_Op_Undef_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Pseudo>(NAME # _UNDEF_D)>;
}
//===----------------------------------------------------------------------===//
@@ -3872,67 +3926,122 @@ class sve_int_un_pred_arit<bits<2> sz8_64, bits<4> opc,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
- let DestructiveInstType = DestructiveOther;
+ let DestructiveInstType = DestructiveUnaryPassthru;
let ElementSize = zprty.ElementSize;
}
multiclass sve_int_un_pred_arit_0<bits<3> opc, string asm,
SDPatternOperator op> {
- def _B : sve_int_un_pred_arit<0b00, { opc, 0b0 }, asm, ZPR8>;
- def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>;
- def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>;
- def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
+ def _B : sve_int_un_pred_arit<0b00, { opc, 0b0 }, asm, ZPR8>,
+ SVEPseudo2Instr<NAME # _B, 1>;
+ def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>,
+ SVEPseudo2Instr<NAME # _H, 1>;
+ def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>,
+ SVEPseudo2Instr<NAME # _S, 1>;
+ def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>,
+ SVEPseudo2Instr<NAME # _D, 1>;
def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ def _UNDEF_B : PredOneOpPassthruPseudo<NAME # _B, ZPR8>;
+ def _UNDEF_H : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
+ def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
+ def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Pseudo>(NAME # _UNDEF_B)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Pseudo>(NAME # _UNDEF_H)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Pseudo>(NAME # _UNDEF_D)>;
}
multiclass sve_int_un_pred_arit_0_h<bits<3> opc, string asm,
SDPatternOperator op> {
- def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>;
- def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>;
- def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
+ def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>,
+ SVEPseudo2Instr<NAME # _H, 1>;
+ def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>,
+ SVEPseudo2Instr<NAME # _S, 1>;
+ def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>,
+ SVEPseudo2Instr<NAME # _D, 1>;
def : SVE_InReg_Extend<nxv8i16, op, nxv8i1, nxv8i8, !cast<Instruction>(NAME # _H)>;
def : SVE_InReg_Extend<nxv4i32, op, nxv4i1, nxv4i8, !cast<Instruction>(NAME # _S)>;
def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i8, !cast<Instruction>(NAME # _D)>;
+
+ def _UNDEF_H : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
+ def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
+ def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+
+ defm : SVE_InReg_Extend_PassthruUndef<nxv8i16, op, nxv8i1, nxv8i8, !cast<Pseudo>(NAME # _UNDEF_H)>;
+ defm : SVE_InReg_Extend_PassthruUndef<nxv4i32, op, nxv4i1, nxv4i8, !cast<Pseudo>(NAME # _UNDEF_S)>;
+ defm : SVE_InReg_Extend_PassthruUndef<nxv2i64, op, nxv2i1, nxv2i8, !cast<Pseudo>(NAME # _UNDEF_D)>;
}
multiclass sve_int_un_pred_arit_0_w<bits<3> opc, string asm,
SDPatternOperator op> {
- def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>;
- def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
+ def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>,
+ SVEPseudo2Instr<NAME # _S, 1>;
+ def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>,
+ SVEPseudo2Instr<NAME # _D, 1>;
def : SVE_InReg_Extend<nxv4i32, op, nxv4i1, nxv4i16, !cast<Instruction>(NAME # _S)>;
def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i16, !cast<Instruction>(NAME # _D)>;
+
+ def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
+ def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+
+ defm : SVE_InReg_Extend_PassthruUndef<nxv4i32, op, nxv4i1, nxv4i16, !cast<Pseudo>(NAME # _UNDEF_S)>;
+ defm : SVE_InReg_Extend_PassthruUndef<nxv2i64, op, nxv2i1, nxv2i16, !cast<Pseudo>(NAME # _UNDEF_D)>;
}
multiclass sve_int_un_pred_arit_0_d<bits<3> opc, string asm,
SDPatternOperator op> {
- def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>;
+ def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>,
+ SVEPseudo2Instr<NAME # _D, 1>;
def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i32, !cast<Instruction>(NAME # _D)>;
+
+ def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+
+ defm : SVE_InReg_Extend_PassthruUndef<nxv2i64, op, nxv2i1, nxv2i32, !cast<Pseudo>(NAME # _UNDEF_D)>;
}
multiclass sve_int_un_pred_arit_1<bits<3> opc, string asm,
SDPatternOperator op> {
- def _B : sve_int_un_pred_arit<0b00, { opc, 0b1 }, asm, ZPR8>;
- def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>;
- def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>;
- def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>;
+ def _B : sve_int_un_pred_arit<0b00, { opc, 0b1 }, asm, ZPR8>,
+ SVEPseudo2Instr<NAME # _B, 1>;
+ def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>,
+ SVEPseudo2Instr<NAME # _H, 1>;
+ def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>,
+ SVEPseudo2Instr<NAME # _S, 1>;
+ def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>,
+ SVEPseudo2Instr<NAME # _D, 1>;
def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
+
+ def _UNDEF_B : PredOneOpPassthruPseudo<NAME # _B, ZPR8>;
+ def _UNDEF_H : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
+ def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
+ def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Pseudo>(NAME # _UNDEF_B)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Pseudo>(NAME # _UNDEF_H)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Pseudo>(NAME # _UNDEF_D)>;
}
multiclass sve_int_un_pred_arit_1_fp<bits<3> opc, string asm, SDPatternOperator op> {
- def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>;
- def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>;
- def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>;
+ def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>,
+ SVEPseudo2Instr<NAME # _H, 1>;
+ def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>,
+ SVEPseudo2Instr<NAME # _S, 1>;
+ def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>,
+ SVEPseudo2Instr<NAME # _D, 1>;
def : SVE_1_Op_Passthru_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
def : SVE_1_Op_Passthru_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
@@ -3940,6 +4049,17 @@ multiclass sve_int_un_pred_arit_1_fp<bits<3> opc, string asm, SDPatternOperator
def : SVE_1_Op_Passthru_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
+
+ def _UNDEF_H : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
+ def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
+ def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Pseudo>(NAME # _UNDEF_S)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Pseudo>(NAME # _UNDEF_S)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Pseudo>(NAME # _UNDEF_D)>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AArch64/sve-forward-st-to-ld.ll b/llvm/test/CodeGen/AArch64/sve-forward-st-to-ld.ll
index 854c477491ba..ba967cd5cba5 100644
--- a/llvm/test/CodeGen/AArch64/sve-forward-st-to-ld.ll
+++ b/llvm/test/CodeGen/AArch64/sve-forward-st-to-ld.ll
@@ -33,6 +33,7 @@ define <vscale x 2 x i64> @sti32ldi32ext(<vscale x 2 x i32>* nocapture %P, <vsca
; CHECK-LABEL: sti32ldi32ext:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z1, z0
; CHECK-NEXT: sxtw z1.d, p0/m, z0.d
; CHECK-NEXT: st1w { z0.d }, p0, [x0]
; CHECK-NEXT: mov z0.d, z1.d
diff --git a/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll b/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll
index dffeee2df570..69fa65733333 100644
--- a/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll
+++ b/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll
@@ -32,6 +32,7 @@ define <vscale x 2 x i64> @no_dag_combine_sext(<vscale x 2 x i1> %pg,
; CHECK-LABEL: no_dag_combine_sext
; CHECK: ld1b { z1.d }, p0/z, [z0.d, #16]
; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
; CHECK-NEXT: sxtb z0.d, p0/m, z1.d
; CHECK-NEXT: st1b { z1.d }, p1, [x0]
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll b/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll
index e71ec8178034..f97fa34392a2 100644
--- a/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll
+++ b/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll
@@ -9,6 +9,7 @@ define <vscale x 2 x i64> @masked_sgather_sext(i8* %base, <vscale x 2 x i64> %of
; CHECK: // %bb.0:
; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, z0.d]
; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z2, z0
; CHECK-NEXT: sxtb z2.d, p0/m, z0.d
; CHECK-NEXT: add z0.d, z0.d, z1.d
; CHECK-NEXT: sxtb z0.d, p0/m, z0.d
diff --git a/llvm/test/CodeGen/AArch64/sve-unary-movprfx.ll b/llvm/test/CodeGen/AArch64/sve-unary-movprfx.ll
new file mode 100644
index 000000000000..a29aebfb001b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-unary-movprfx.ll
@@ -0,0 +1,1000 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+;
+; ABS (sve_int_un_pred_arit_0)
+;
+
+; Check movprfx is inserted when no passthru/predicate is present
+define <vscale x 16 x i8> @abs_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: abs_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: abs z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+ %ret = tail call <vscale x 16 x i8> @llvm.abs.nxv16i8(<vscale x 16 x i8> %b, i1 0)
+ ret <vscale x 16 x i8> %ret
+}
+
+; Check movprfx is not inserted when dstReg == srcReg
+define <vscale x 16 x i8> @abs_i8_dupreg(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: abs_i8_dupreg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: abs z0.b, p0/m, z0.b
+; CHECK-NEXT: ret
+ %ret = tail call <vscale x 16 x i8> @llvm.abs.nxv16i8(<vscale x 16 x i8> %a, i1 0)
+ ret <vscale x 16 x i8> %ret
+}
+
+; Check movprfx is inserted when passthru is undef
+define <vscale x 16 x i8> @abs_i8_undef(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: abs_i8_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: abs z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %ret
+}
+
+; Check movprfx is inserted when predicate is all active, making the passthru dead
+define <vscale x 16 x i8> @abs_i8_active(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: abs_i8_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: abs z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %ret
+}
+
+; Check movprfx is not inserted when predicate is not all active, making the passthru used
+define <vscale x 16 x i8> @abs_i8_not_active(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: abs_i8_not_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: abs z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
+ %ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg.to, <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %ret
+}
+
+define <vscale x 8 x i16> @abs_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: abs_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: abs z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+ %ret = tail call <vscale x 8 x i16> @llvm.abs.nxv8i16(<vscale x 8 x i16> %b, i1 0)
+ ret <vscale x 8 x i16> %ret
+}
+
+define <vscale x 8 x i16> @abs_i16_dupreg(<vscale x 8 x i16> %a) #0 {
+; CHECK-LABEL: abs_i16_dupreg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: abs z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+ %ret = tail call <vscale x 8 x i16> @llvm.abs.nxv8i16(<vscale x 8 x i16> %a, i1 0)
+ ret <vscale x 8 x i16> %ret
+}
+
+define <vscale x 8 x i16> @abs_i16_undef(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: abs_i16_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: abs z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %ret
+}
+
+define <vscale x 8 x i16> @abs_i16_active(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: abs_i16_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: abs z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %ret
+}
+
+define <vscale x 8 x i16> @abs_i16_not_active(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: abs_i16_not_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: abs z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
+ %pg.from = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg.to)
+ %ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg.from, <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %ret
+}
+
+define <vscale x 4 x i32> @abs_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: abs_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: abs z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %ret = tail call <vscale x 4 x i32> @llvm.abs.nxv4i32(<vscale x 4 x i32> %b, i1 0)
+ ret <vscale x 4 x i32> %ret
+}
+
+define <vscale x 4 x i32> @abs_i32_dupreg(<vscale x 4 x i32> %a) #0 {
+; CHECK-LABEL: abs_i32_dupreg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: abs z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+ %ret = tail call <vscale x 4 x i32> @llvm.abs.nxv4i32(<vscale x 4 x i32> %a, i1 0)
+ ret <vscale x 4 x i32> %ret
+}
+
+define <vscale x 4 x i32> @abs_i32_undef(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: abs_i32_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: abs z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %ret
+}
+
+define <vscale x 4 x i32> @abs_i32_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: abs_i32_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: abs z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %ret
+}
+
+define <vscale x 4 x i32> @abs_i32_not_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: abs_i32_not_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: abs z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
+ %pg.from = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.to)
+ %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg.from, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %ret
+}
+
+define <vscale x 2 x i64> @abs_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: abs_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: abs z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+ %ret = tail call <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64> %b, i1 0)
+ ret <vscale x 2 x i64> %ret
+}
+
+define <vscale x 2 x i64> @abs_i64_dupreg(<vscale x 2 x i64> %a) #0 {
+; CHECK-LABEL: abs_i64_dupreg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: abs z0.d, p0/m, z0.d
+; CHECK-NEXT: ret
+ %ret = tail call <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64> %a, i1 0)
+ ret <vscale x 2 x i64> %ret
+}
+
+define <vscale x 2 x i64> @abs_i64_undef(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: abs_i64_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: abs z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.abs.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %ret
+}
+
+define <vscale x 2 x i64> @abs_i64_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: abs_i64_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: abs z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.abs.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %ret
+}
+
+define <vscale x 2 x i64> @abs_i64_not_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: abs_i64_not_active:
+; CHECK: // %bb.0:
+; CHECK: abs z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+ %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.abs.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %ret
+}
+
+;
+; CLS (sve_int_un_pred_arit_1)
+;
+
+define <vscale x 16 x i8> @cls_i8_dupreg(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: cls_i8_dupreg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: cls z0.b, p0/m, z0.b
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cls.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a)
+ ret <vscale x 16 x i8> %ret
+}
+
+define <vscale x 16 x i8> @cls_i8_undef(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: cls_i8_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cls z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cls.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %ret
+}
+
+define <vscale x 16 x i8> @cls_i8_active(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: cls_i8_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cls z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cls.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %ret
+}
+
+define <vscale x 16 x i8> @cls_i8_not_active(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: cls_i8_not_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: cls z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
+ %ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cls.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg.to, <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %ret
+}
+
+define <vscale x 8 x i16> @cls_i16_dupreg(<vscale x 8 x i16> %a) #0 {
+; CHECK-LABEL: cls_i16_dupreg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: cls z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cls.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
+ ret <vscale x 8 x i16> %ret
+}
+
+define <vscale x 8 x i16> @cls_i16_undef(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: cls_i16_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cls z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cls.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %ret
+}
+
+define <vscale x 8 x i16> @cls_i16_active(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: cls_i16_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cls z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cls.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %ret
+}
+
+define <vscale x 8 x i16> @cls_i16_not_active(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: cls_i16_not_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: cls z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
+ %pg.from = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg.to)
+ %ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cls.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg.from, <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %ret
+}
+
+define <vscale x 4 x i32> @cls_i32_dupreg(<vscale x 4 x i32> %a) #0 {
+; CHECK-LABEL: cls_i32_dupreg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: cls z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a)
+ ret <vscale x 4 x i32> %ret
+}
+
+define <vscale x 4 x i32> @cls_i32_undef(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: cls_i32_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cls z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %ret
+}
+
+define <vscale x 4 x i32> @cls_i32_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: cls_i32_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cls z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %ret
+}
+
+define <vscale x 4 x i32> @cls_i32_not_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: cls_i32_not_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: cls z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
+ %pg.from = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.to)
+ %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg.from, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %ret
+}
+
+define <vscale x 2 x i64> @cls_i64_dupreg(<vscale x 2 x i64> %a) #0 {
+; CHECK-LABEL: cls_i64_dupreg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: cls z0.d, p0/m, z0.d
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a)
+ ret <vscale x 2 x i64> %ret
+}
+
+define <vscale x 2 x i64> @cls_i64_undef(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: cls_i64_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cls z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %ret
+}
+
+define <vscale x 2 x i64> @cls_i64_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: cls_i64_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: cls z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %ret
+}
+
+define <vscale x 2 x i64> @cls_i64_not_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: cls_i64_not_active:
+; CHECK: // %bb.0:
+; CHECK: cls z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+ %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %ret
+}
+
+;
+; FABS (sve_int_un_pred_arit_1_fp)
+;
+
+define <vscale x 8 x half> @fabs_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: fabs_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fabs z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+ %ret = tail call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %b)
+ ret <vscale x 8 x half> %ret
+}
+
+define <vscale x 8 x half> @fabs_f16_dupreg(<vscale x 8 x half> %a) #0 {
+; CHECK-LABEL: fabs_f16_dupreg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: fabs z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+ %ret = tail call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %a)
+ ret <vscale x 8 x half> %ret
+}
+
+define <vscale x 8 x half> @fabs_f16_undef(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: fabs_f16_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fabs z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %ret = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b)
+ ret <vscale x 8 x half> %ret
+}
+
+define <vscale x 8 x half> @fabs_f16_active(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: fabs_f16_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fabs z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %ret = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg, <vscale x 8 x half> %b)
+ ret <vscale x 8 x half> %ret
+}
+
+define <vscale x 8 x half> @fabs_f16_not_active(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: fabs_f16_not_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fabs z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
+ %pg.from = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg.to)
+ %ret = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %pg.from, <vscale x 8 x half> %b)
+ ret <vscale x 8 x half> %ret
+}
+
+define <vscale x 4 x float> @fabs_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: fabs_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fabs z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %ret = tail call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %ret
+}
+
+define <vscale x 4 x float> @fabs_f32_dupreg(<vscale x 4 x float> %a) #0 {
+; CHECK-LABEL: fabs_f32_dupreg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fabs z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+ %ret = tail call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> %a)
+ ret <vscale x 4 x float> %ret
+}
+
+define <vscale x 4 x float> @fabs_f32_undef(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: fabs_f32_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fabs z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %ret = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %ret
+}
+
+define <vscale x 4 x float> @fabs_f32_active(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: fabs_f32_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fabs z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %ret = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %ret
+}
+
+define <vscale x 4 x float> @fabs_f32_not_active(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: fabs_f32_not_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fabs z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
+ %pg.from = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.to)
+ %ret = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg.from, <vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %ret
+}
+
+define <vscale x 2 x double> @fabs_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: fabs_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fabs z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+ %ret = tail call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %ret
+}
+
+define <vscale x 2 x double> @fabs_f64_dupreg(<vscale x 2 x double> %a) #0 {
+; CHECK-LABEL: fabs_f64_dupreg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fabs z0.d, p0/m, z0.d
+; CHECK-NEXT: ret
+ %ret = tail call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %a)
+ ret <vscale x 2 x double> %ret
+}
+
+define <vscale x 2 x double> @fabs_f64_undef(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: fabs_f64_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fabs z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %ret = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %ret
+}
+
+define <vscale x 2 x double> @fabs_f64_active(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: fabs_f64_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fabs z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %ret = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %ret
+}
+
+define <vscale x 2 x double> @fabs_f64_not_active(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: fabs_f64_not_active:
+; CHECK: // %bb.0:
+; CHECK: fabs z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+ %ret = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %ret
+}
+
+;
+; SXTB (sve_int_un_pred_arit_0_h)
+;
+
+define <vscale x 8 x i16> @sxtb_i16(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) #0 {
+; CHECK-LABEL: sxtb_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sxtb z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+ %ret = sext <vscale x 8 x i8> %b to <vscale x 8 x i16>
+ ret <vscale x 8 x i16> %ret
+}
+
+define <vscale x 8 x i16> @sxtb_i16_dupreg(<vscale x 8 x i8> %a) #0 {
+; CHECK-LABEL: sxtb_i16_dupreg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: sxtb z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+ %ret = sext <vscale x 8 x i8> %a to <vscale x 8 x i16>
+ ret <vscale x 8 x i16> %ret
+}
+
+define <vscale x 8 x i16> @sxtb_i16_undef(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: sxtb_i16_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sxtb z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sxtb.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %ret
+}
+
+define <vscale x 8 x i16> @sxtb_i16_active(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: sxtb_i16_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sxtb z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sxtb.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %ret
+}
+
+define <vscale x 8 x i16> @sxtb_i16_not_active(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: sxtb_i16_not_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: sxtb z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
+ %pg.from = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg.to)
+ %ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sxtb.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg.from, <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %ret
+}
+
+define <vscale x 4 x i32> @sxtb_i32(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b) #0 {
+; CHECK-LABEL: sxtb_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sxtb z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %ret = sext <vscale x 4 x i8> %b to <vscale x 4 x i32>
+ ret <vscale x 4 x i32> %ret
+}
+
+define <vscale x 4 x i32> @sxtb_i32_dupreg(<vscale x 4 x i8> %a) #0 {
+; CHECK-LABEL: sxtb_i32_dupreg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+ %ret = sext <vscale x 4 x i8> %a to <vscale x 4 x i32>
+ ret <vscale x 4 x i32> %ret
+}
+
+define <vscale x 4 x i32> @sxtb_i32_undef(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: sxtb_i32_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sxtb z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sxtb.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %ret
+}
+
+define <vscale x 4 x i32> @sxtb_i32_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: sxtb_i32_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sxtb z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sxtb.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %ret
+}
+
+define <vscale x 4 x i32> @sxtb_i32_not_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: sxtb_i32_not_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: sxtb z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
+ %pg.from = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.to)
+ %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sxtb.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg.from, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %ret
+}
+
+define <vscale x 2 x i64> @sxtb_i64(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b) #0 {
+; CHECK-LABEL: sxtb_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sxtb z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+ %ret = sext <vscale x 2 x i8> %b to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %ret
+}
+
+define <vscale x 2 x i64> @sxtb_i64_dupreg(<vscale x 2 x i8> %a) #0 {
+; CHECK-LABEL: sxtb_i64_dupreg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: sxtb z0.d, p0/m, z0.d
+; CHECK-NEXT: ret
+ %ret = sext <vscale x 2 x i8> %a to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %ret
+}
+
+define <vscale x 2 x i64> @sxtb_i64_undef(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: sxtb_i64_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sxtb z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxtb.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %ret
+}
+
+define <vscale x 2 x i64> @sxtb_i64_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: sxtb_i64_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sxtb z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxtb.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %ret
+}
+
+define <vscale x 2 x i64> @sxtb_i64_not_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: sxtb_i64_not_active:
+; CHECK: // %bb.0:
+; CHECK: sxtb z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+ %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxtb.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %ret
+}
+
+;
+; SXTH (sve_int_un_pred_arit_0_w)
+;
+
+define <vscale x 4 x i32> @sxth_i32(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b) #0 {
+; CHECK-LABEL: sxth_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sxth z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %ret = sext <vscale x 4 x i16> %b to <vscale x 4 x i32>
+ ret <vscale x 4 x i32> %ret
+}
+
+define <vscale x 4 x i32> @sxth_i32_dupreg(<vscale x 4 x i16> %a) #0 {
+; CHECK-LABEL: sxth_i32_dupreg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: sxth z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+ %ret = sext <vscale x 4 x i16> %a to <vscale x 4 x i32>
+ ret <vscale x 4 x i32> %ret
+}
+
+define <vscale x 4 x i32> @sxth_i32_undef(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: sxth_i32_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sxth z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sxth.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %ret
+}
+
+define <vscale x 4 x i32> @sxth_i32_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: sxth_i32_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sxth z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sxth.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %ret
+}
+
+define <vscale x 4 x i32> @sxth_i32_not_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: sxth_i32_not_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: sxth z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
+ %pg.from = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.to)
+ %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sxth.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg.from, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %ret
+}
+
+define <vscale x 2 x i64> @sxth_i64(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b) #0 {
+; CHECK-LABEL: sxth_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sxth z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+ %ret = sext <vscale x 2 x i16> %b to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %ret
+}
+
+define <vscale x 2 x i64> @sxth_i64_dupreg(<vscale x 2 x i16> %a) #0 {
+; CHECK-LABEL: sxth_i64_dupreg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: sxth z0.d, p0/m, z0.d
+; CHECK-NEXT: ret
+ %ret = sext <vscale x 2 x i16> %a to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %ret
+}
+
+define <vscale x 2 x i64> @sxth_i64_undef(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: sxth_i64_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sxth z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxth.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %ret
+}
+
+define <vscale x 2 x i64> @sxth_i64_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: sxth_i64_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sxth z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxth.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %ret
+}
+
+define <vscale x 2 x i64> @sxth_i64_not_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: sxth_i64_not_active:
+; CHECK: // %bb.0:
+; CHECK: sxth z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+ %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxth.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %ret
+}
+
+;
+; SXTW (sve_int_un_pred_arit_0_d)
+;
+
+define <vscale x 2 x i64> @sxtw_i64(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) #0 {
+; CHECK-LABEL: sxtw_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sxtw z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+ %ret = sext <vscale x 2 x i32> %b to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %ret
+}
+
+define <vscale x 2 x i64> @sxtw_i64_dupreg(<vscale x 2 x i32> %a) #0 {
+; CHECK-LABEL: sxtw_i64_dupreg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: sxtw z0.d, p0/m, z0.d
+; CHECK-NEXT: ret
+ %ret = sext <vscale x 2 x i32> %a to <vscale x 2 x i64>
+ ret <vscale x 2 x i64> %ret
+}
+
+define <vscale x 2 x i64> @sxtw_i64_undef(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: sxtw_i64_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sxtw z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %ret
+}
+
+define <vscale x 2 x i64> @sxtw_i64_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: sxtw_i64_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sxtw z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %ret
+}
+
+define <vscale x 2 x i64> @sxtw_i64_not_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: sxtw_i64_not_active:
+; CHECK: // %bb.0:
+; CHECK: sxtw z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+ %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %ret
+}
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
+declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
+
+declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
+declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.abs.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.abs.nxv16i8(<vscale x 16 x i8>, i1)
+declare <vscale x 8 x i16> @llvm.abs.nxv8i16(<vscale x 8 x i16>, i1)
+declare <vscale x 4 x i32> @llvm.abs.nxv4i32(<vscale x 4 x i32>, i1)
+declare <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64>, i1)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.cls.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.cls.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double>)
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sxtb.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sxtb.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sxtb.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sxth.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sxth.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
+
+attributes #0 = { nounwind "target-features"="+sve" }
diff --git a/llvm/test/CodeGen/AArch64/sve2-unary-movprfx.ll b/llvm/test/CodeGen/AArch64/sve2-unary-movprfx.ll
new file mode 100644
index 000000000000..83b240791edd
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2-unary-movprfx.ll
@@ -0,0 +1,273 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+;
+; SQABS (sve2_int_un_pred_arit)
+;
+
+; Check movprfx is not inserted when dstReg == srcReg
+define <vscale x 16 x i8> @sqabs_i8_dupreg(<vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: sqabs_i8_dupreg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: sqabs z0.b, p0/m, z0.b
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a)
+ ret <vscale x 16 x i8> %ret
+}
+
+; Check movprfx is inserted when passthru is undef
+define <vscale x 16 x i8> @sqabs_i8_undef(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: sqabs_i8_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sqabs z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %ret
+}
+
+; Check movprfx is inserted when predicate is all active, making the passthru dead
+define <vscale x 16 x i8> @sqabs_i8_active(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: sqabs_i8_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sqabs z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %ret
+}
+
+; Check movprfx is not inserted when predicate is not all active, making the passthru used
+define <vscale x 16 x i8> @sqabs_i8_not_active(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: sqabs_i8_not_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: sqabs z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
+ %ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg.to, <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %ret
+}
+
+define <vscale x 8 x i16> @sqabs_i16_dupreg(<vscale x 8 x i16> %a) #0 {
+; CHECK-LABEL: sqabs_i16_dupreg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: sqabs z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
+ ret <vscale x 8 x i16> %ret
+}
+
+define <vscale x 8 x i16> @sqabs_i16_undef(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: sqabs_i16_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sqabs z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %ret
+}
+
+define <vscale x 8 x i16> @sqabs_i16_active(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: sqabs_i16_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sqabs z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %ret
+}
+
+define <vscale x 8 x i16> @sqabs_i16_not_active(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: sqabs_i16_not_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: sqabs z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
+ %pg.from = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg.to)
+ %ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg.from, <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %ret
+}
+
+define <vscale x 4 x i32> @sqabs_i32_dupreg(<vscale x 4 x i32> %a) #0 {
+; CHECK-LABEL: sqabs_i32_dupreg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: sqabs z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a)
+ ret <vscale x 4 x i32> %ret
+}
+
+define <vscale x 4 x i32> @sqabs_i32_undef(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: sqabs_i32_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sqabs z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %ret
+}
+
+define <vscale x 4 x i32> @sqabs_i32_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: sqabs_i32_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sqabs z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %ret
+}
+
+define <vscale x 4 x i32> @sqabs_i32_not_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: sqabs_i32_not_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: sqabs z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
+ %pg.from = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.to)
+ %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg.from, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %ret
+}
+
+define <vscale x 2 x i64> @sqabs_i64_dupreg(<vscale x 2 x i64> %a) #0 {
+; CHECK-LABEL: sqabs_i64_dupreg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: sqabs z0.d, p0/m, z0.d
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a)
+ ret <vscale x 2 x i64> %ret
+}
+
+define <vscale x 2 x i64> @sqabs_i64_undef(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: sqabs_i64_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sqabs z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %ret
+}
+
+define <vscale x 2 x i64> @sqabs_i64_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: sqabs_i64_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: sqabs z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %ret
+}
+
+define <vscale x 2 x i64> @sqabs_i64_not_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> %pg) #0 {
+; CHECK-LABEL: sqabs_i64_not_active:
+; CHECK: // %bb.0:
+; CHECK: sqabs z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+ %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %ret
+}
+
+;
+; URECPE (sve2_int_un_pred_arit_s)
+;
+
+define <vscale x 4 x i32> @urecpe_i32_dupreg(<vscale x 4 x i32> %a) #0 {
+; CHECK-LABEL: urecpe_i32_dupreg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: urecpe z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a)
+ ret <vscale x 4 x i32> %ret
+}
+
+define <vscale x 4 x i32> @urecpe_i32_undef(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: urecpe_i32_undef:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: urecpe z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %ret
+}
+
+define <vscale x 4 x i32> @urecpe_i32_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: urecpe_i32_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: urecpe z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %ret
+}
+
+define <vscale x 4 x i32> @urecpe_i32_not_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: urecpe_i32_not_active:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: urecpe z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
+ %pg.from = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.to)
+ %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg.from, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %ret
+}
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
+declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
+
+declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
+declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+
+attributes #0 = { nounwind "target-features"="+sve2" }
More information about the llvm-commits
mailing list