[llvm] 61e54fd - [SVE][AArch64] Adding patterns for while intrinsics.

Wed Dec 4 09:34:41 PST 2019

Author: Mikhail Gudim
Date: 2019-12-04T12:33:50-05:00
New Revision: 61e54fd60c4388602cb029c984353b1f4319c1ac

URL: https://github.com/llvm/llvm-project/commit/61e54fd60c4388602cb029c984353b1f4319c1ac
DIFF: https://github.com/llvm/llvm-project/commit/61e54fd60c4388602cb029c984353b1f4319c1ac.diff

LOG: [SVE][AArch64] Adding patterns for while intrinsics.

Added: 
    llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll

Modified: 
    llvm/include/llvm/IR/IntrinsicsAArch64.td
    llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
    llvm/lib/Target/AArch64/SVEInstrFormats.td

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 1044f071e78d..8d81b325f083 100644

--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -989,6 +989,10 @@ class AdvSIMD_GatherLoad_32bitOffset_Intrinsic
 // SVE
 
 let TargetPrefix = "aarch64" in {  // All intrinsics start with "llvm.aarch64.".
+  class AdvSIMD_SVE_WHILE_Intrinsic
+    : Intrinsic<[llvm_anyvector_ty],
+                [llvm_anyint_ty, LLVMMatchType<1>],
+                [IntrNoMem]>;
 
 class AdvSIMD_GatherLoad_VecTorBase_Intrinsic
     : Intrinsic<[llvm_anyvector_ty],
@@ -1099,6 +1103,19 @@ def int_aarch64_sve_uxtb : AdvSIMD_Merged1VectorArg_Intrinsic;
 def int_aarch64_sve_uxth : AdvSIMD_Merged1VectorArg_Intrinsic;
 def int_aarch64_sve_uxtw : AdvSIMD_Merged1VectorArg_Intrinsic;
 
+//
+// While comparisons
+//
+
+def int_aarch64_sve_whilele : AdvSIMD_SVE_WHILE_Intrinsic;
+def int_aarch64_sve_whilelo : AdvSIMD_SVE_WHILE_Intrinsic;
+def int_aarch64_sve_whilels : AdvSIMD_SVE_WHILE_Intrinsic;
+def int_aarch64_sve_whilelt : AdvSIMD_SVE_WHILE_Intrinsic;
+def int_aarch64_sve_whilege : AdvSIMD_SVE_WHILE_Intrinsic;
+def int_aarch64_sve_whilegt : AdvSIMD_SVE_WHILE_Intrinsic;
+def int_aarch64_sve_whilehs : AdvSIMD_SVE_WHILE_Intrinsic;
+def int_aarch64_sve_whilehi : AdvSIMD_SVE_WHILE_Intrinsic;
+
 //
 // Floating-point arithmetic
 //

diff  --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index cf0e5c516c14..d5c890898e9e 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -769,15 +769,15 @@ let Predicates = [HasSVE] in {
   defm FCMEQ_PPzZ0 : sve_fp_2op_p_pd<0b100, "fcmeq">;
   defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne">;
 
-  defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt">;
-  defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele">;
-  defm WHILELO_PWW : sve_int_while4_rr<0b110, "whilelo">;
-  defm WHILELS_PWW : sve_int_while4_rr<0b111, "whilels">;
+  defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt", int_aarch64_sve_whilelt>;
+  defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele", int_aarch64_sve_whilele>;
+  defm WHILELO_PWW : sve_int_while4_rr<0b110, "whilelo", int_aarch64_sve_whilelo>;
+  defm WHILELS_PWW : sve_int_while4_rr<0b111, "whilels", int_aarch64_sve_whilels>;
 
-  defm WHILELT_PXX : sve_int_while8_rr<0b010, "whilelt">;
-  defm WHILELE_PXX : sve_int_while8_rr<0b011, "whilele">;
-  defm WHILELO_PXX : sve_int_while8_rr<0b110, "whilelo">;
-  defm WHILELS_PXX : sve_int_while8_rr<0b111, "whilels">;
+  defm WHILELT_PXX : sve_int_while8_rr<0b010, "whilelt", int_aarch64_sve_whilelt>;
+  defm WHILELE_PXX : sve_int_while8_rr<0b011, "whilele", int_aarch64_sve_whilele>;
+  defm WHILELO_PXX : sve_int_while8_rr<0b110, "whilelo", int_aarch64_sve_whilelo>;
+  defm WHILELS_PXX : sve_int_while8_rr<0b111, "whilels", int_aarch64_sve_whilels>;
 
   def CTERMEQ_WW : sve_int_cterm<0b0, 0b0, "ctermeq", GPR32>;
   def CTERMNE_WW : sve_int_cterm<0b0, 0b1, "ctermne", GPR32>;
@@ -1511,15 +1511,16 @@ let Predicates = [HasSVE2] in {
   defm TBX_ZZZ  : sve2_int_perm_tbx<"tbx">;
 
   // SVE2 integer compare scalar count and limit
-  defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege">;
-  defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt">;
-  defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs">;
-  defm WHILEHI_PWW : sve_int_while4_rr<0b101, "whilehi">;
-
-  defm WHILEGE_PXX : sve_int_while8_rr<0b000, "whilege">;
-  defm WHILEGT_PXX : sve_int_while8_rr<0b001, "whilegt">;
-  defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs">;
-  defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi">;
+  defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege", null_frag>;
+  defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt", null_frag>;
+  defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs", null_frag>;
+  defm WHILEHI_PWW : sve_int_while4_rr<0b101, "whilehi", null_frag>;
+
+  defm WHILEGE_PXX : sve_int_while8_rr<0b000, "whilege", null_frag>;
+  defm WHILEGT_PXX : sve_int_while8_rr<0b001, "whilegt", null_frag>;
+  defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs", null_frag>;
+  defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi", null_frag>;
+
 
   // SVE2 pointer conflict compare
   defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr">;

diff  --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 7ce831cd3b59..bda517e16c73 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -3630,7 +3630,8 @@ class sve_int_cterm<bit sz, bit opc, string asm, RegisterClass rt>
 }
 
 class sve_int_while_rr<bits<2> sz8_64, bits<4> opc, string asm,
-                       RegisterClass gprty, PPRRegOp pprty>
+                       RegisterClass gprty, PPRRegOp pprty,
+                       ValueType vt, SDPatternOperator op>
 : I<(outs pprty:$Pd), (ins gprty:$Rn, gprty:$Rm),
   asm, "\t$Pd, $Rn, $Rm",
   "", []>, Sched<[]> {
@@ -3650,18 +3651,28 @@ class sve_int_while_rr<bits<2> sz8_64, bits<4> opc, string asm,
   let Defs = [NZCV];
 }
 
-multiclass sve_int_while4_rr<bits<3> opc, string asm> {
-  def _B : sve_int_while_rr<0b00, { 0, opc }, asm, GPR32, PPR8>;
-  def _H : sve_int_while_rr<0b01, { 0, opc }, asm, GPR32, PPR16>;
-  def _S : sve_int_while_rr<0b10, { 0, opc }, asm, GPR32, PPR32>;
-  def _D : sve_int_while_rr<0b11, { 0, opc }, asm, GPR32, PPR64>;
+multiclass sve_int_while4_rr<bits<3> opc, string asm, SDPatternOperator op> {
+  def _B : sve_int_while_rr<0b00, { 0, opc }, asm, GPR32, PPR8, nxv16i1, op>;
+  def _H : sve_int_while_rr<0b01, { 0, opc }, asm, GPR32, PPR16, nxv8i1, op>;
+  def _S : sve_int_while_rr<0b10, { 0, opc }, asm, GPR32, PPR32, nxv4i1, op>;
+  def _D : sve_int_while_rr<0b11, { 0, opc }, asm, GPR32, PPR64, nxv2i1, op>;
+
+  def : SVE_2_Op_Pat<nxv16i1, op, i32, i32, !cast<Instruction>(NAME # _B)>;
+  def : SVE_2_Op_Pat<nxv8i1, op, i32, i32, !cast<Instruction>(NAME # _H)>;
+  def : SVE_2_Op_Pat<nxv4i1, op, i32, i32, !cast<Instruction>(NAME # _S)>;
+  def : SVE_2_Op_Pat<nxv2i1, op, i32, i32, !cast<Instruction>(NAME # _D)>;
 }
 
-multiclass sve_int_while8_rr<bits<3> opc, string asm> {
-  def _B : sve_int_while_rr<0b00, { 1, opc }, asm, GPR64, PPR8>;
-  def _H : sve_int_while_rr<0b01, { 1, opc }, asm, GPR64, PPR16>;
-  def _S : sve_int_while_rr<0b10, { 1, opc }, asm, GPR64, PPR32>;
-  def _D : sve_int_while_rr<0b11, { 1, opc }, asm, GPR64, PPR64>;
+multiclass sve_int_while8_rr<bits<3> opc, string asm, SDPatternOperator op> {
+  def _B : sve_int_while_rr<0b00, { 1, opc }, asm, GPR64, PPR8, nxv16i1, op>;
+  def _H : sve_int_while_rr<0b01, { 1, opc }, asm, GPR64, PPR16, nxv8i1, op>;
+  def _S : sve_int_while_rr<0b10, { 1, opc }, asm, GPR64, PPR32, nxv4i1, op>;
+  def _D : sve_int_while_rr<0b11, { 1, opc }, asm, GPR64, PPR64, nxv2i1, op>;
+
+  def : SVE_2_Op_Pat<nxv16i1, op, i64, i64, !cast<Instruction>(NAME # _B)>;
+  def : SVE_2_Op_Pat<nxv8i1, op, i64, i64, !cast<Instruction>(NAME # _H)>;
+  def : SVE_2_Op_Pat<nxv4i1, op, i64, i64, !cast<Instruction>(NAME # _S)>;
+  def : SVE_2_Op_Pat<nxv2i1, op, i64, i64, !cast<Instruction>(NAME # _D)>;
 }
 
 class sve2_int_while_rr<bits<2> sz8_64, bits<1> rw, string asm,

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll
new file mode 100644
index 000000000000..0590c74d2efc
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll
@@ -0,0 +1,309 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; WHILELE
+;
+
+define <vscale x 16 x i1> @whilele_b_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilele_b_ww:
+; CHECK: whilele p0.b, w0, w1
+; CHECK-NEXT: ret
+  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i32(i32 %a, i32 %b)
+  ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 16 x i1> @whilele_b_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilele_b_xx:
+; CHECK: whilele p0.b, x0, x1
+; CHECK-NEXT: ret
+  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 %a, i64 %b)
+  ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 8 x i1> @whilele_h_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilele_h_ww:
+; CHECK: whilele p0.h, w0, w1
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilele.nxv8i1.i32(i32 %a, i32 %b)
+  ret <vscale x 8 x i1> %out
+}
+
+define <vscale x 8 x i1> @whilele_h_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilele_h_xx:
+; CHECK: whilele p0.h, x0, x1
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilele.nxv8i1.i64(i64 %a, i64 %b)
+  ret <vscale x 8 x i1> %out
+}
+
+define <vscale x 4 x i1> @whilele_s_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilele_s_ww:
+; CHECK: whilele p0.s, w0, w1
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilele.nxv4i1.i32(i32 %a, i32 %b)
+  ret <vscale x 4 x i1> %out
+}
+
+define <vscale x 4 x i1> @whilele_s_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilele_s_xx:
+; CHECK: whilele p0.s, x0, x1
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilele.nxv4i1.i64(i64 %a, i64 %b)
+  ret <vscale x 4 x i1> %out
+}
+
+define <vscale x 2 x i1> @whilele_d_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilele_d_ww:
+; CHECK: whilele p0.d, w0, w1
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i32(i32 %a, i32 %b)
+  ret <vscale x 2 x i1> %out
+}
+
+define <vscale x 2 x i1> @whilele_d_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilele_d_xx:
+; CHECK: whilele p0.d, x0, x1
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i64(i64 %a, i64 %b)
+  ret <vscale x 2 x i1> %out
+}
+
+;
+; WHILELO
+;
+
+define <vscale x 16 x i1> @whilelo_b_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilelo_b_ww:
+; CHECK: whilelo p0.b, w0, w1
+; CHECK-NEXT: ret
+  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i32(i32 %a, i32 %b)
+  ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 16 x i1> @whilelo_b_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilelo_b_xx:
+; CHECK: whilelo p0.b, x0, x1
+; CHECK-NEXT: ret
+  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 %a, i64 %b)
+  ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 8 x i1> @whilelo_h_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilelo_h_ww:
+; CHECK: whilelo p0.h, w0, w1
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilelo.nxv8i1.i32(i32 %a, i32 %b)
+  ret <vscale x 8 x i1> %out
+}
+
+define <vscale x 8 x i1> @whilelo_h_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilelo_h_xx:
+; CHECK: whilelo p0.h, x0, x1
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilelo.nxv8i1.i64(i64 %a, i64 %b)
+  ret <vscale x 8 x i1> %out
+}
+
+define <vscale x 4 x i1> @whilelo_s_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilelo_s_ww:
+; CHECK: whilelo p0.s, w0, w1
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i32(i32 %a, i32 %b)
+  ret <vscale x 4 x i1> %out
+}
+
+define <vscale x 4 x i1> @whilelo_s_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilelo_s_xx:
+; CHECK: whilelo p0.s, x0, x1
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i64(i64 %a, i64 %b)
+  ret <vscale x 4 x i1> %out
+}
+
+define <vscale x 2 x i1> @whilelo_d_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilelo_d_ww:
+; CHECK: whilelo p0.d, w0, w1
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelo.nxv2i1.i32(i32 %a, i32 %b)
+  ret <vscale x 2 x i1> %out
+}
+
+define <vscale x 2 x i1> @whilelo_d_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilelo_d_xx:
+; CHECK: whilelo p0.d, x0, x1
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelo.nxv2i1.i64(i64 %a, i64 %b)
+  ret <vscale x 2 x i1> %out
+}
+
+;
+; WHILELS
+;
+
+define <vscale x 16 x i1> @whilels_b_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilels_b_ww:
+; CHECK: whilels p0.b, w0, w1
+; CHECK-NEXT: ret
+  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i32(i32 %a, i32 %b)
+  ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 16 x i1> @whilels_b_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilels_b_xx:
+; CHECK: whilels p0.b, x0, x1
+; CHECK-NEXT: ret
+  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 %a, i64 %b)
+  ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 8 x i1> @whilels_h_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilels_h_ww:
+; CHECK: whilels p0.h, w0, w1
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilels.nxv8i1.i32(i32 %a, i32 %b)
+  ret <vscale x 8 x i1> %out
+}
+
+define <vscale x 8 x i1> @whilels_h_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilels_h_xx:
+; CHECK: whilels p0.h, x0, x1
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilels.nxv8i1.i64(i64 %a, i64 %b)
+  ret <vscale x 8 x i1> %out
+}
+
+define <vscale x 4 x i1> @whilels_s_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilels_s_ww:
+; CHECK: whilels p0.s, w0, w1
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilels.nxv4i1.i32(i32 %a, i32 %b)
+  ret <vscale x 4 x i1> %out
+}
+
+define <vscale x 4 x i1> @whilels_s_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilels_s_xx:
+; CHECK: whilels p0.s, x0, x1
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilels.nxv4i1.i64(i64 %a, i64 %b)
+  ret <vscale x 4 x i1> %out
+}
+
+define <vscale x 2 x i1> @whilels_d_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilels_d_ww:
+; CHECK: whilels p0.d, w0, w1
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilels.nxv2i1.i32(i32 %a, i32 %b)
+  ret <vscale x 2 x i1> %out
+}
+
+define <vscale x 2 x i1> @whilels_d_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilels_d_xx:
+; CHECK: whilels p0.d, x0, x1
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilels.nxv2i1.i64(i64 %a, i64 %b)
+  ret <vscale x 2 x i1> %out
+}
+
+;
+; WHILELT
+;
+
+define <vscale x 16 x i1> @whilelt_b_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilelt_b_ww:
+; CHECK: whilelt p0.b, w0, w1
+; CHECK-NEXT: ret
+  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i32(i32 %a, i32 %b)
+  ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 16 x i1> @whilelt_b_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilelt_b_xx:
+; CHECK: whilelt p0.b, x0, x1
+; CHECK-NEXT: ret
+  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 %a, i64 %b)
+  ret <vscale x 16 x i1> %out
+}
+
+define <vscale x 8 x i1> @whilelt_h_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilelt_h_ww:
+; CHECK: whilelt p0.h, w0, w1
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilelt.nxv8i1.i32(i32 %a, i32 %b)
+  ret <vscale x 8 x i1> %out
+}
+
+define <vscale x 8 x i1> @whilelt_h_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilelt_h_xx:
+; CHECK: whilelt p0.h, x0, x1
+; CHECK-NEXT: ret
+  %out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilelt.nxv8i1.i64(i64 %a, i64 %b)
+  ret <vscale x 8 x i1> %out
+}
+
+define <vscale x 4 x i1> @whilelt_s_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilelt_s_ww:
+; CHECK: whilelt p0.s, w0, w1
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelt.nxv4i1.i32(i32 %a, i32 %b)
+  ret <vscale x 4 x i1> %out
+}
+
+define <vscale x 4 x i1> @whilelt_s_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilelt_s_xx:
+; CHECK: whilelt p0.s, x0, x1
+; CHECK-NEXT: ret
+  %out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelt.nxv4i1.i64(i64 %a, i64 %b)
+  ret <vscale x 4 x i1> %out
+}
+
+define <vscale x 2 x i1> @whilelt_d_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilelt_d_ww:
+; CHECK: whilelt p0.d, w0, w1
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i32(i32 %a, i32 %b)
+  ret <vscale x 2 x i1> %out
+}
+
+define <vscale x 2 x i1> @whilelt_d_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilelt_d_xx:
+; CHECK: whilelt p0.d, x0, x1
+; CHECK-NEXT: ret
+  %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64 %a, i64 %b)
+  ret <vscale x 2 x i1> %out
+}
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i32(i32, i32)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i64(i64, i64)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.whilele.nxv8i1.i32(i32, i32)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.whilele.nxv8i1.i64(i64, i64)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.whilele.nxv4i1.i32(i32, i32)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.whilele.nxv4i1.i64(i64, i64)
+declare <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i32(i32, i32)
+declare <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i64(i64, i64)
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i32(i32, i32)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64, i64)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.whilelo.nxv8i1.i32(i32, i32)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.whilelo.nxv8i1.i64(i64, i64)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i32(i32, i32)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i64(i64, i64)
+declare <vscale x 2 x i1> @llvm.aarch64.sve.whilelo.nxv2i1.i32(i32, i32)
+declare <vscale x 2 x i1> @llvm.aarch64.sve.whilelo.nxv2i1.i64(i64, i64)
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i32(i32, i32)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i64(i64, i64)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.whilels.nxv8i1.i32(i32, i32)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.whilels.nxv8i1.i64(i64, i64)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.whilels.nxv4i1.i32(i32, i32)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.whilels.nxv4i1.i64(i64, i64)
+declare <vscale x 2 x i1> @llvm.aarch64.sve.whilels.nxv2i1.i32(i32, i32)
+declare <vscale x 2 x i1> @llvm.aarch64.sve.whilels.nxv2i1.i64(i64, i64)
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i32(i32, i32)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64, i64)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.whilelt.nxv8i1.i32(i32, i32)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.whilelt.nxv8i1.i64(i64, i64)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.whilelt.nxv4i1.i32(i32, i32)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.whilelt.nxv4i1.i64(i64, i64)
+declare <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i32(i32, i32)
+declare <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64, i64)