[llvm] 871815e - [AArch64][SVE2p1] Add SVE2.1 while (predicate-pair) intrinsics
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 19 01:32:30 PST 2023
Author: David Sherwood
Date: 2023-01-19T09:32:20Z
New Revision: 871815e062a9e1d143f29333e6129f1cad0f83bb
URL: https://github.com/llvm/llvm-project/commit/871815e062a9e1d143f29333e6129f1cad0f83bb
DIFF: https://github.com/llvm/llvm-project/commit/871815e062a9e1d143f29333e6129f1cad0f83bb.diff
LOG: [AArch64][SVE2p1] Add SVE2.1 while (predicate-pair) intrinsics
Adds intrinsics for the following instructions:
* WHILEGE (predicate pair)
* WHILEGT (predicate pair)
* WHILEHI (predicate pair)
* WHILEHS (predicate pair)
* WHILELE (predicate pair)
* WHILELO (predicate pair)
* WHILELS (predicate pair)
* WHILELT (predicate pair)
I've added an opcode selector called SelectOpcodeFromVT to
AArch64ISelDAGToDAG.cpp that we will extend in future to
select opcodes from different MVTs. For now, the only use is
for selecting predicate types.
NOTE: These intrinsics are still in development and are subject
to future changes.
Differential Revision: https://reviews.llvm.org/D141936
Added:
llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll
Modified:
llvm/include/llvm/IR/IntrinsicsAArch64.td
llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index a6ecbb66a5d7b..172d844598528 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -2683,4 +2683,13 @@ let TargetPrefix = "aarch64" in {
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<0>, llvm_i32_ty]>;
+
+ //
+ // Predicate-pair intrinsics
+ //
+ foreach cmp = ["ge", "gt", "hi", "hs", "le", "lo", "ls", "lt"] in {
+ def int_aarch64_sve_while # cmp # _x2
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+ [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
+ }
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 2ee78b9861ec3..666b6292e28fc 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -356,6 +356,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
unsigned Opc_rr, unsigned Opc_ri,
bool IsIntr = false);
+ void SelectWhilePair(SDNode *N, unsigned Opc);
bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
/// SVE Reg+Imm addressing mode.
@@ -1688,6 +1689,64 @@ AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
}
+enum class SelectTypeKind {
+ Int1 = 0,
+};
+
+/// This function selects an opcode from a list of opcodes, which is
+/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
+/// element types, in this order.
+template <SelectTypeKind Kind>
+static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
+ // Only match scalable vector VTs
+ if (!VT.isScalableVector())
+ return 0;
+
+ EVT EltVT = VT.getVectorElementType();
+ switch (Kind) {
+ case SelectTypeKind::Int1:
+ if (EltVT != MVT::i1)
+ return 0;
+ break;
+ }
+
+ unsigned Offset;
+ switch (VT.getVectorMinNumElements()) {
+ case 16: // 8-bit
+ Offset = 0;
+ break;
+ case 8: // 16-bit
+ Offset = 1;
+ break;
+ case 4: // 32-bit
+ Offset = 2;
+ break;
+ case 2: // 64-bit
+ Offset = 3;
+ break;
+ default:
+ return 0;
+ }
+
+ return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
+}
+
+void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
+
+ SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
+ SDValue SuperReg = SDValue(WhilePair, 0);
+
+ for (unsigned I = 0; I < 2; ++I)
+ ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
+ AArch64::psub0 + I, DL, VT, SuperReg));
+
+ CurDAG->RemoveDeadNode(N);
+}
+
void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
unsigned Scale, unsigned Opc_ri,
unsigned Opc_rr, bool IsIntr) {
@@ -4623,6 +4682,62 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
if (tryMULLV64LaneV128(IntNo, Node))
return;
break;
+ case Intrinsic::aarch64_sve_whilege_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
+ Node->getValueType(0),
+ {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
+ AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
+ SelectWhilePair(Node, Op);
+ return;
+ case Intrinsic::aarch64_sve_whilegt_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
+ Node->getValueType(0),
+ {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
+ AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
+ SelectWhilePair(Node, Op);
+ return;
+ case Intrinsic::aarch64_sve_whilehi_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
+ Node->getValueType(0),
+ {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
+ AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
+ SelectWhilePair(Node, Op);
+ return;
+ case Intrinsic::aarch64_sve_whilehs_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
+ Node->getValueType(0),
+ {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
+ AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
+ SelectWhilePair(Node, Op);
+ return;
+ case Intrinsic::aarch64_sve_whilele_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
+ Node->getValueType(0),
+ {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
+ AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
+ SelectWhilePair(Node, Op);
+ return;
+ case Intrinsic::aarch64_sve_whilelo_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
+ Node->getValueType(0),
+ {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
+ AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
+ SelectWhilePair(Node, Op);
+ return;
+ case Intrinsic::aarch64_sve_whilels_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
+ Node->getValueType(0),
+ {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
+ AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
+ SelectWhilePair(Node, Op);
+ return;
+ case Intrinsic::aarch64_sve_whilelt_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
+ Node->getValueType(0),
+ {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
+ AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
+ SelectWhilePair(Node, Op);
+ return;
}
break;
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d8a3738df505f..324d1cf0d007c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -299,6 +299,14 @@ static bool isZeroingInactiveLanes(SDValue Op) {
case Intrinsic::aarch64_sve_whilelt:
case Intrinsic::aarch64_sve_match:
case Intrinsic::aarch64_sve_nmatch:
+ case Intrinsic::aarch64_sve_whilege_x2:
+ case Intrinsic::aarch64_sve_whilegt_x2:
+ case Intrinsic::aarch64_sve_whilehi_x2:
+ case Intrinsic::aarch64_sve_whilehs_x2:
+ case Intrinsic::aarch64_sve_whilele_x2:
+ case Intrinsic::aarch64_sve_whilelo_x2:
+ case Intrinsic::aarch64_sve_whilels_x2:
+ case Intrinsic::aarch64_sve_whilelt_x2:
return true;
}
}
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll
new file mode 100644
index 0000000000000..ab70f57b48874
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll
@@ -0,0 +1,663 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64 -mattr=+sve2p1 < %s | FileCheck %s
+
+; == WHILEGE ==
+
+define <vscale x 16 x i1> @whilege_x2_nxv16i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilege_x2_nxv16i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilege { p0.b, p1.b }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
+ ret <vscale x 16 x i1> %res
+}
+
+define <vscale x 8 x i1> @whilege_x2_nxv8i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilege_x2_nxv8i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilege { p0.h, p1.h }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
+ ret <vscale x 8 x i1> %res
+}
+
+define <vscale x 4 x i1> @whilege_x2_nxv4i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilege_x2_nxv4i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilege { p0.s, p1.s }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
+ ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 2 x i1> @whilege_x2_nxv2i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilege_x2_nxv2i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilege { p0.d, p1.d }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilege.x2.nxv2i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
+ ret <vscale x 2 x i1> %res
+}
+
+
+; == WHILEGT ==
+
+define <vscale x 16 x i1> @whilegt_x2_nxv16i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilegt_x2_nxv16i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilegt { p0.b, p1.b }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv16i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
+ ret <vscale x 16 x i1> %res
+}
+
+define <vscale x 8 x i1> @whilegt_x2_nxv8i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilegt_x2_nxv8i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilegt { p0.h, p1.h }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
+ ret <vscale x 8 x i1> %res
+}
+
+define <vscale x 4 x i1> @whilegt_x2_nxv4i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilegt_x2_nxv4i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilegt { p0.s, p1.s }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
+ ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 2 x i1> @whilegt_x2_nxv2i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilegt_x2_nxv2i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilegt { p0.d, p1.d }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv2i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
+ ret <vscale x 2 x i1> %res
+}
+
+
+; == WHILEHI ==
+
+define <vscale x 16 x i1> @whilehi_x2_nxv16i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilehi_x2_nxv16i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilehi { p0.b, p1.b }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv16i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
+ ret <vscale x 16 x i1> %res
+}
+
+define <vscale x 8 x i1> @whilehi_x2_nxv8i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilehi_x2_nxv8i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilehi { p0.h, p1.h }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv8i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
+ ret <vscale x 8 x i1> %res
+}
+
+define <vscale x 4 x i1> @whilehi_x2_nxv4i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilehi_x2_nxv4i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilehi { p0.s, p1.s }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
+ ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 2 x i1> @whilehi_x2_nxv2i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilehi_x2_nxv2i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilehi { p0.d, p1.d }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
+ ret <vscale x 2 x i1> %res
+}
+
+
+; == WHILEHS ==
+
+define <vscale x 16 x i1> @whilehs_x2_nxv16i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilehs_x2_nxv16i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilehs { p0.b, p1.b }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv16i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
+ ret <vscale x 16 x i1> %res
+}
+
+define <vscale x 8 x i1> @whilehs_x2_nxv8i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilehs_x2_nxv8i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilehs { p0.h, p1.h }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
+ ret <vscale x 8 x i1> %res
+}
+
+define <vscale x 4 x i1> @whilehs_x2_nxv4i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilehs_x2_nxv4i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilehs { p0.s, p1.s }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
+ ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 2 x i1> @whilehs_x2_nxv2i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilehs_x2_nxv2i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilehs { p0.d, p1.d }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
+ ret <vscale x 2 x i1> %res
+}
+
+
+; == WHILELE ==
+
+define <vscale x 16 x i1> @whilele_x2_nxv16i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilele_x2_nxv16i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilele { p0.b, p1.b }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
+ ret <vscale x 16 x i1> %res
+}
+
+define <vscale x 8 x i1> @whilele_x2_nxv8i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilele_x2_nxv8i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilele { p0.h, p1.h }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilele.x2.nxv8i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
+ ret <vscale x 8 x i1> %res
+}
+
+define <vscale x 4 x i1> @whilele_x2_nxv4i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilele_x2_nxv4i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilele { p0.s, p1.s }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
+ ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 2 x i1> @whilele_x2_nxv2i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilele_x2_nxv2i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilele { p0.d, p1.d }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilele.x2.nxv2i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
+ ret <vscale x 2 x i1> %res
+}
+
+
+; == WHILELO ==
+
+define <vscale x 16 x i1> @whilelo_x2_nxv16i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilelo_x2_nxv16i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilelo { p0.b, p1.b }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv16i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
+ ret <vscale x 16 x i1> %res
+}
+
+define <vscale x 8 x i1> @whilelo_x2_nxv8i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilelo_x2_nxv8i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilelo { p0.h, p1.h }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
+ ret <vscale x 8 x i1> %res
+}
+
+define <vscale x 4 x i1> @whilelo_x2_nxv4i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilelo_x2_nxv4i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilelo { p0.s, p1.s }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
+ ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 2 x i1> @whilelo_x2_nxv2i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilelo_x2_nxv2i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilelo { p0.d, p1.d }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
+ ret <vscale x 2 x i1> %res
+}
+
+
+; == WHILELS ==
+
+define <vscale x 16 x i1> @whilels_x2_nxv16i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilels_x2_nxv16i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilels { p0.b, p1.b }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilels.x2.nxv16i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
+ ret <vscale x 16 x i1> %res
+}
+
+define <vscale x 8 x i1> @whilels_x2_nxv8i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilels_x2_nxv8i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilels { p0.h, p1.h }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
+ ret <vscale x 8 x i1> %res
+}
+
+define <vscale x 4 x i1> @whilels_x2_nxv4i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilels_x2_nxv4i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilels { p0.s, p1.s }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
+ ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 2 x i1> @whilels_x2_nxv2i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilels_x2_nxv2i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilels { p0.d, p1.d }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilels.x2.nxv2i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
+ ret <vscale x 2 x i1> %res
+}
+
+
+; == WHILELT ==
+
+define <vscale x 16 x i1> @whilelt_x2_nxv16i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilelt_x2_nxv16i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilelt { p0.b, p1.b }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv16i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
+ ret <vscale x 16 x i1> %res
+}
+
+define <vscale x 8 x i1> @whilelt_x2_nxv8i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilelt_x2_nxv8i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilelt { p0.h, p1.h }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
+ ret <vscale x 8 x i1> %res
+}
+
+define <vscale x 4 x i1> @whilelt_x2_nxv4i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilelt_x2_nxv4i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilelt { p0.s, p1.s }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
+ ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 2 x i1> @whilelt_x2_nxv2i1(i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilelt_x2_nxv2i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilelt { p0.d, p1.d }, x0, x1
+; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64 %m, i64 %n)
+ %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
+ ret <vscale x 2 x i1> %res
+}
+
+
+; Test that we get good code quality when using while in combination with other intrinsics
+
+define <vscale x 32 x i1> @codegen_whilege_b16_x2(i64 noundef %op1, i64 noundef %op2) nounwind {
+; CHECK-LABEL: codegen_whilege_b16_x2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: whilege { p0.h, p1.h }, x0, x1
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64 %op1, i64 %op2)
+ %1 = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } %0, 0
+ %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %1)
+ %3 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> %2, i64 0)
+ %4 = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } %0, 1
+ %5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %4)
+ %6 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> %3, <vscale x 16 x i1> %5, i64 16)
+ ret <vscale x 32 x i1> %6
+}
+
+define <vscale x 32 x i1> @codegen_whilegt_b32_x2(i64 noundef %op1, i64 noundef %op2) nounwind {
+; CHECK-LABEL: codegen_whilegt_b32_x2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: whilegt { p0.s, p1.s }, x0, x1
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64 %op1, i64 %op2)
+ %1 = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %0, 0
+ %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
+ %3 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> %2, i64 0)
+ %4 = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %0, 1
+ %5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %4)
+ %6 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> %3, <vscale x 16 x i1> %5, i64 16)
+ ret <vscale x 32 x i1> %6
+}
+
+define <vscale x 32 x i1> @codegen_whilehi_b64_x2(i64 noundef %op1, i64 noundef %op2) nounwind {
+; CHECK-LABEL: codegen_whilehi_b64_x2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: whilehi { p0.d, p1.d }, x0, x1
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64 %op1, i64 %op2)
+ %1 = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } %0, 0
+ %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %1)
+ %3 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> %2, i64 0)
+ %4 = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } %0, 1
+ %5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %4)
+ %6 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> %3, <vscale x 16 x i1> %5, i64 16)
+ ret <vscale x 32 x i1> %6
+}
+
+define <vscale x 32 x i1> @codegen_whilehs_b16_x2(i64 noundef %op1, i64 noundef %op2) nounwind {
+; CHECK-LABEL: codegen_whilehs_b16_x2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: whilehs { p0.h, p1.h }, x0, x1
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64 %op1, i64 %op2)
+ %1 = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } %0, 0
+ %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %1)
+ %3 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> %2, i64 0)
+ %4 = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } %0, 1
+ %5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %4)
+ %6 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> %3, <vscale x 16 x i1> %5, i64 16)
+ ret <vscale x 32 x i1> %6
+}
+
+define <vscale x 32 x i1> @codegen_whilele_b32_x2(i64 noundef %op1, i64 noundef %op2) nounwind {
+; CHECK-LABEL: codegen_whilele_b32_x2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: whilele { p0.s, p1.s }, x0, x1
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64 %op1, i64 %op2)
+ %1 = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %0, 0
+ %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
+ %3 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> %2, i64 0)
+ %4 = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %0, 1
+ %5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %4)
+ %6 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> %3, <vscale x 16 x i1> %5, i64 16)
+ ret <vscale x 32 x i1> %6
+}
+
+define <vscale x 32 x i1> @codegen_whilelo_b64_x2(i64 noundef %op1, i64 noundef %op2) nounwind {
+; CHECK-LABEL: codegen_whilelo_b64_x2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: whilelo { p0.d, p1.d }, x0, x1
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64 %op1, i64 %op2)
+ %1 = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } %0, 0
+ %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %1)
+ %3 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> %2, i64 0)
+ %4 = extractvalue { <vscale x 2 x i1>, <vscale x 2 x i1> } %0, 1
+ %5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %4)
+ %6 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> %3, <vscale x 16 x i1> %5, i64 16)
+ ret <vscale x 32 x i1> %6
+}
+
+define <vscale x 32 x i1> @codegen_whilels_b16_x2(i64 noundef %op1, i64 noundef %op2) nounwind {
+; CHECK-LABEL: codegen_whilels_b16_x2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: whilels { p0.h, p1.h }, x0, x1
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64 %op1, i64 %op2)
+ %1 = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } %0, 0
+ %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %1)
+ %3 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> %2, i64 0)
+ %4 = extractvalue { <vscale x 8 x i1>, <vscale x 8 x i1> } %0, 1
+ %5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %4)
+ %6 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> %3, <vscale x 16 x i1> %5, i64 16)
+ ret <vscale x 32 x i1> %6
+}
+
+define <vscale x 32 x i1> @codegen_whilelt_b32_x2(i64 noundef %op1, i64 noundef %op2) nounwind {
+; CHECK-LABEL: codegen_whilelt_b32_x2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: whilelt { p0.s, p1.s }, x0, x1
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64 %op1, i64 %op2)
+ %1 = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %0, 0
+ %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
+ %3 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> poison, <vscale x 16 x i1> %2, i64 0)
+ %4 = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %0, 1
+ %5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %4)
+ %6 = tail call <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1> %3, <vscale x 16 x i1> %5, i64 16)
+ ret <vscale x 32 x i1> %6
+}
+
+
+; == Test that we use predicate registers starting at a multiple of 2 ==
+
+define <vscale x 16 x i1> @whilege_x2_nxv16i1_reg_off(<vscale x 16 x i1> %p0, i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilege_x2_nxv16i1_reg_off:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilege { p2.b, p3.b }, x0, x1
+; CHECK-NEXT: and p0.b, p2/z, p2.b, p0.b
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64 %m, i64 %n)
+ %part1 = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
+ %res = and <vscale x 16 x i1> %part1, %p0
+ ret <vscale x 16 x i1> %res
+}
+
+define <vscale x 8 x i1> @whilegt_x2_nxv8i1_reg_off(<vscale x 8 x i1> %p0, i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilegt_x2_nxv8i1_reg_off:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilegt { p2.h, p3.h }, x0, x1
+; CHECK-NEXT: and p0.b, p2/z, p2.b, p0.b
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64 %m, i64 %n)
+ %part1 = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
+ %res = and <vscale x 8 x i1> %part1, %p0
+ ret <vscale x 8 x i1> %res
+}
+
+define <vscale x 4 x i1> @whilehi_x2_nxv4i1_reg_off(<vscale x 4 x i1> %p0, i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilehi_x2_nxv4i1_reg_off:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilehi { p2.s, p3.s }, x0, x1
+; CHECK-NEXT: and p0.b, p2/z, p2.b, p0.b
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64 %m, i64 %n)
+ %part1 = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
+ %res = and <vscale x 4 x i1> %part1, %p0
+ ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 2 x i1> @whilehs_x2_nxv2i1_reg_off(<vscale x 2 x i1> %p0, i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilehs_x2_nxv2i1_reg_off:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilehs { p2.d, p3.d }, x0, x1
+; CHECK-NEXT: and p0.b, p2/z, p2.b, p0.b
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64 %m, i64 %n)
+ %part1 = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
+ %res = and <vscale x 2 x i1> %part1, %p0
+ ret <vscale x 2 x i1> %res
+}
+
+define <vscale x 16 x i1> @whilele_x2_nxv16i1_reg_off(<vscale x 16 x i1> %p0, i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilele_x2_nxv16i1_reg_off:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilele { p2.b, p3.b }, x0, x1
+; CHECK-NEXT: and p0.b, p2/z, p2.b, p0.b
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64 %m, i64 %n)
+ %part1 = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
+ %res = and <vscale x 16 x i1> %part1, %p0
+ ret <vscale x 16 x i1> %res
+}
+
+define <vscale x 8 x i1> @whilelo_x2_nxv8i1_reg_off(<vscale x 8 x i1> %p0, i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilelo_x2_nxv8i1_reg_off:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilelo { p2.h, p3.h }, x0, x1
+; CHECK-NEXT: and p0.b, p2/z, p2.b, p0.b
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64 %m, i64 %n)
+ %part1 = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
+ %res = and <vscale x 8 x i1> %part1, %p0
+ ret <vscale x 8 x i1> %res
+}
+
+define <vscale x 4 x i1> @whilels_x2_nxv4i1_reg_off(<vscale x 4 x i1> %p0, i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilels_x2_nxv4i1_reg_off:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilels { p2.s, p3.s }, x0, x1
+; CHECK-NEXT: and p0.b, p2/z, p2.b, p0.b
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64 %m, i64 %n)
+ %part1 = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
+ %res = and <vscale x 4 x i1> %part1, %p0
+ ret <vscale x 4 x i1> %res
+}
+
+define <vscale x 2 x i1> @whilelt_x2_nxv2i1_reg_off(<vscale x 2 x i1> %p0, i64 %m, i64 %n) nounwind {
+; CHECK-LABEL: whilelt_x2_nxv2i1_reg_off:
+; CHECK: // %bb.0:
+; CHECK-NEXT: whilelt { p2.d, p3.d }, x0, x1
+; CHECK-NEXT: and p0.b, p2/z, p2.b, p0.b
+; CHECK-NEXT: ret
+ %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64 %m, i64 %n)
+ %part1 = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
+ %res = and <vscale x 2 x i1> %part1, %p0
+ ret <vscale x 2 x i1> %res
+}
+
+; == WHILEGE ==
+declare { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64, i64)
+declare { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64, i64)
+declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64, i64)
+declare { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilege.x2.nxv2i1(i64, i64)
+
+; == WHILEGT ==
+declare { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv16i1(i64, i64)
+declare { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64, i64)
+declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64, i64)
+declare { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv2i1(i64, i64)
+
+; == WHILEHI ==
+declare { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv16i1(i64, i64)
+declare { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv8i1(i64, i64)
+declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64, i64)
+declare { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64, i64)
+
+; == WHILEHS ==
+declare { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv16i1(i64, i64)
+declare { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64, i64)
+declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64, i64)
+declare { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64, i64)
+
+; == WHILELE ==
+declare { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64, i64)
+declare { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilele.x2.nxv8i1(i64, i64)
+declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64, i64)
+declare { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilele.x2.nxv2i1(i64, i64)
+
+; == WHILELO ==
+declare { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv16i1(i64, i64)
+declare { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64, i64)
+declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64, i64)
+declare { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64, i64)
+
+; == WHILELS ==
+declare { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilels.x2.nxv16i1(i64, i64)
+declare { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64, i64)
+declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64, i64)
+declare { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilels.x2.nxv2i1(i64, i64)
+
+; == WHILELT ==
+declare { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv16i1(i64, i64)
+declare { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64, i64)
+declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64, i64)
+declare { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64, i64)
+
+; == SVBOOL CONVERSION ==
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
+
+; == VECTOR INSERTS ==
+declare <vscale x 32 x i1> @llvm.vector.insert.nxv32i1.nxv16i1(<vscale x 32 x i1>, <vscale x 16 x i1>, i64 immarg)
More information about the llvm-commits
mailing list