[llvm] [LLVM][CodeGen][SVE] rev(whilelo(a, b)) -> whilehi(b, a). (PR #88294)

Wed Apr 17 03:43:32 PDT 2024

https://github.com/paulwalker-arm updated https://github.com/llvm/llvm-project/pull/88294

>From ff5a750c722bf4a4450b251f23ad6aa792a938e2 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Wed, 10 Apr 2024 14:59:15 +0100
Subject: [PATCH 1/3] Add tests for rev(while##(a,b).

---
 .../AArch64/sve2-intrinsics-while-reversed.ll | 741 ++++++++++++++++++
 1 file changed, 741 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/sve2-intrinsics-while-reversed.ll

diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-while-reversed.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-while-reversed.ll
new file mode 100644
index 00000000000000..82b82b59c3f989
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-while-reversed.ll
@@ -0,0 +1,741 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+sve2 < %s | FileCheck %s
+; RUN: llc -mattr=+sme < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+;
+; WHILEGE
+;
+
+define <vscale x 16 x i1> @whilege_b_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilege_b_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilele p0.b, w1, w0
+; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 16 x i1> @whilege_b_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilege_b_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilele p0.b, x1, x0
+; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilege_h_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilege_h_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilele p0.h, w1, w0
+; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilele.nxv8i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilege_h_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilege_h_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilele p0.h, x1, x0
+; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilele.nxv8i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilege_s_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilege_s_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilele p0.s, w1, w0
+; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilele.nxv4i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilege_s_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilege_s_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilele p0.s, x1, x0
+; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilele.nxv4i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilege_d_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilege_d_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilele p0.d, w1, w0
+; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilege_d_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilege_d_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilele p0.d, x1, x0
+; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+;
+; WHILEHS
+;
+
+define <vscale x 16 x i1> @whilehs_b_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilehs_b_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilels p0.b, w1, w0
+; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 16 x i1> @whilehs_b_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilehs_b_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilels p0.b, x1, x0
+; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilehs_h_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilehs_h_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilels p0.h, w1, w0
+; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilels.nxv8i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilehs_h_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilehs_h_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilels p0.h, x1, x0
+; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilels.nxv8i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilehs_s_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilehs_s_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilels p0.s, w1, w0
+; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilels.nxv4i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilehs_s_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilehs_s_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilels p0.s, x1, x0
+; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilels.nxv4i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilehs_d_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilehs_d_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilels p0.d, w1, w0
+; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilels.nxv2i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilehs_d_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilehs_d_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilels p0.d, x1, x0
+; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilels.nxv2i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+;
+; WHILEGT
+;
+
+define <vscale x 16 x i1> @whilegt_b_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilegt_b_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelt p0.b, w1, w0
+; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 16 x i1> @whilegt_b_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilegt_b_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelt p0.b, x1, x0
+; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilegt_h_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilegt_h_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelt p0.h, w1, w0
+; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilelt.nxv8i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilegt_h_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilegt_h_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelt p0.h, x1, x0
+; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilelt.nxv8i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilegt_s_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilegt_s_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelt p0.s, w1, w0
+; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelt.nxv4i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilegt_s_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilegt_s_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelt p0.s, x1, x0
+; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelt.nxv4i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilegt_d_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilegt_d_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelt p0.d, w1, w0
+; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilegt_d_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilegt_d_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelt p0.d, x1, x0
+; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+;
+; WHILEHI
+;
+
+define <vscale x 16 x i1> @whilehi_b_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilehi_b_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelo p0.b, w1, w0
+; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 16 x i1> @whilehi_b_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilehi_b_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelo p0.b, x1, x0
+; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilehi_h_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilehi_h_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelo p0.h, w1, w0
+; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilelo.nxv8i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilehi_h_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilehi_h_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelo p0.h, x1, x0
+; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilelo.nxv8i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilehi_s_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilehi_s_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelo p0.s, w1, w0
+; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilehi_s_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilehi_s_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelo p0.s, x1, x0
+; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilehi_d_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilehi_d_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelo p0.d, w1, w0
+; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelo.nxv2i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilehi_d_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilehi_d_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelo p0.d, x1, x0
+; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelo.nxv2i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+;
+; WHILELE
+;
+
+define <vscale x 16 x i1> @whilele_b_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilele_b_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.b, w1, w0
+; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 16 x i1> @whilele_b_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilele_b_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.b, x1, x0
+; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilele_h_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilele_h_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.h, w1, w0
+; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilege.nxv8i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilele_h_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilele_h_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.h, x1, x0
+; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilege.nxv8i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilele_s_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilele_s_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.s, w1, w0
+; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilele_s_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilele_s_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.s, x1, x0
+; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilele_d_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilele_d_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.d, w1, w0
+; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilele_d_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilele_d_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege p0.d, x1, x0
+; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+;
+; WHILELO
+;
+
+define <vscale x 16 x i1> @whilelo_b_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilelo_b_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.b, w1, w0
+; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 16 x i1> @whilelo_b_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilelo_b_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.b, x1, x0
+; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilelo_h_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilelo_h_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.h, w1, w0
+; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilehi.nxv8i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilelo_h_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilelo_h_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.h, x1, x0
+; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilehi.nxv8i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilelo_s_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilelo_s_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.s, w1, w0
+; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilehi.nxv4i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilelo_s_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilelo_s_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.s, x1, x0
+; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilehi.nxv4i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilelo_d_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilelo_d_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.d, w1, w0
+; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehi.nxv2i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilelo_d_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilelo_d_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi p0.d, x1, x0
+; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehi.nxv2i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+;
+; WHILELS
+;
+
+define <vscale x 16 x i1> @whilels_b_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilels_b_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.b, w1, w0
+; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 16 x i1> @whilels_b_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilels_b_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.b, x1, x0
+; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilels_h_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilels_h_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.h, w1, w0
+; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilehs.nxv8i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilels_h_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilels_h_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.h, x1, x0
+; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilehs.nxv8i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilels_s_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilels_s_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.s, w1, w0
+; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilehs.nxv4i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilels_s_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilels_s_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.s, x1, x0
+; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilehs.nxv4i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilels_d_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilels_d_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.d, w1, w0
+; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehs.nxv2i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilels_d_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilels_d_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs p0.d, x1, x0
+; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehs.nxv2i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+;
+; WHILELT
+;
+
+define <vscale x 16 x i1> @whilelt_b_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilelt_b_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.b, w1, w0
+; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 16 x i1> @whilelt_b_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilelt_b_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.b, x1, x0
+; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    ret
+  %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
+  ret <vscale x 16 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilelt_h_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilelt_h_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.h, w1, w0
+; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilegt.nxv8i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 8 x i1> @whilelt_h_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilelt_h_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.h, x1, x0
+; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    ret
+  %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilegt.nxv8i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
+  ret <vscale x 8 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilelt_s_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilelt_s_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.s, w1, w0
+; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilegt.nxv4i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 4 x i1> @whilelt_s_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilelt_s_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.s, x1, x0
+; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    ret
+  %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilegt.nxv4i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
+  ret <vscale x 4 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilelt_d_ww(i32 %a, i32 %b) {
+; CHECK-LABEL: whilelt_d_ww:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.d, w1, w0
+; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilegt.nxv2i1.i32(i32 %b, i32 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}
+
+define <vscale x 2 x i1> @whilelt_d_xx(i64 %a, i64 %b) {
+; CHECK-LABEL: whilelt_d_xx:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt p0.d, x1, x0
+; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    ret
+  %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilegt.nxv2i1.i64(i64 %b, i64 %a)
+  %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
+  ret <vscale x 2 x i1> %while.rev
+}

>From 4090b5bc5959befc4b08330af0ac5ee8e6ef400a Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Wed, 10 Apr 2024 16:23:24 +0100
Subject: [PATCH 2/3] [LLVM][CodeGen][SVE] rev(whilelo(a,b)) -> whilehi(b,a).

Add similar isel patterns for all comparison types.
---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  34 ++--
 llvm/lib/Target/AArch64/SVEInstrFormats.td    |  24 ++-
 .../AArch64/sve2-intrinsics-while-reversed.ll | 192 ++++++------------
 3 files changed, 103 insertions(+), 147 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index a519d81362a73a..b37d926ab1816b 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2037,15 +2037,15 @@ let Predicates = [HasSVEorSME] in {
   defm FCMEQ_PPzZ0 : sve_fp_2op_p_pd<0b100, "fcmeq", SETOEQ, SETEQ, SETOEQ, SETEQ>;
   defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne", SETUNE, SETNE, SETUNE, SETNE>;
 
-  defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt", int_aarch64_sve_whilelt>;
-  defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele", int_aarch64_sve_whilele>;
-  defm WHILELO_PWW : sve_int_while4_rr<0b110, "whilelo", int_aarch64_sve_whilelo>;
-  defm WHILELS_PWW : sve_int_while4_rr<0b111, "whilels", int_aarch64_sve_whilels>;
+  defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt", int_aarch64_sve_whilelt, int_aarch64_sve_whilegt>;
+  defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele", int_aarch64_sve_whilele, int_aarch64_sve_whilege>;
+  defm WHILELO_PWW : sve_int_while4_rr<0b110, "whilelo", int_aarch64_sve_whilelo, int_aarch64_sve_whilehi>;
+  defm WHILELS_PWW : sve_int_while4_rr<0b111, "whilels", int_aarch64_sve_whilels, int_aarch64_sve_whilehs>;
 
-  defm WHILELT_PXX : sve_int_while8_rr<0b010, "whilelt", int_aarch64_sve_whilelt>;
-  defm WHILELE_PXX : sve_int_while8_rr<0b011, "whilele", int_aarch64_sve_whilele>;
-  defm WHILELO_PXX : sve_int_while8_rr<0b110, "whilelo", int_aarch64_sve_whilelo>;
-  defm WHILELS_PXX : sve_int_while8_rr<0b111, "whilels", int_aarch64_sve_whilels>;
+  defm WHILELT_PXX : sve_int_while8_rr<0b010, "whilelt", int_aarch64_sve_whilelt, int_aarch64_sve_whilegt>;
+  defm WHILELE_PXX : sve_int_while8_rr<0b011, "whilele", int_aarch64_sve_whilele, int_aarch64_sve_whilege>;
+  defm WHILELO_PXX : sve_int_while8_rr<0b110, "whilelo", int_aarch64_sve_whilelo, int_aarch64_sve_whilehi>;
+  defm WHILELS_PXX : sve_int_while8_rr<0b111, "whilels", int_aarch64_sve_whilels, int_aarch64_sve_whilehs>;
 
   def CTERMEQ_WW : sve_int_cterm<0b0, 0b0, "ctermeq", GPR32>;
   def CTERMNE_WW : sve_int_cterm<0b0, 0b1, "ctermne", GPR32>;
@@ -3792,15 +3792,15 @@ let Predicates = [HasSVE2orSME] in {
   defm TBX_ZZZ  : sve2_int_perm_tbx<"tbx", 0b01, int_aarch64_sve_tbx>;
 
   // SVE2 integer compare scalar count and limit
-  defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege", int_aarch64_sve_whilege>;
-  defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt", int_aarch64_sve_whilegt>;
-  defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs", int_aarch64_sve_whilehs>;
-  defm WHILEHI_PWW : sve_int_while4_rr<0b101, "whilehi", int_aarch64_sve_whilehi>;
-
-  defm WHILEGE_PXX : sve_int_while8_rr<0b000, "whilege", int_aarch64_sve_whilege>;
-  defm WHILEGT_PXX : sve_int_while8_rr<0b001, "whilegt", int_aarch64_sve_whilegt>;
-  defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs", int_aarch64_sve_whilehs>;
-  defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi", int_aarch64_sve_whilehi>;
+  defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege", int_aarch64_sve_whilege, int_aarch64_sve_whilele>;
+  defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt", int_aarch64_sve_whilegt, int_aarch64_sve_whilelt>;
+  defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs", int_aarch64_sve_whilehs, int_aarch64_sve_whilels>;
+  defm WHILEHI_PWW : sve_int_while4_rr<0b101, "whilehi", int_aarch64_sve_whilehi, int_aarch64_sve_whilelo>;
+
+  defm WHILEGE_PXX : sve_int_while8_rr<0b000, "whilege", int_aarch64_sve_whilege, int_aarch64_sve_whilele>;
+  defm WHILEGT_PXX : sve_int_while8_rr<0b001, "whilegt", int_aarch64_sve_whilegt, int_aarch64_sve_whilelt>;
+  defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs", int_aarch64_sve_whilehs, int_aarch64_sve_whilels>;
+  defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi", int_aarch64_sve_whilehi, int_aarch64_sve_whilelo>;
 
   // SVE2 pointer conflict compare
   defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr", "int_aarch64_sve_whilewr">;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index ee8292fdd8839a..e34f26956f3bca 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -5359,7 +5359,8 @@ class sve_int_while_rr<bits<2> sz8_64, bits<4> opc, string asm,
   let isWhile = 1;
 }
 
-multiclass sve_int_while4_rr<bits<3> opc, string asm, SDPatternOperator op> {
+multiclass sve_int_while4_rr<bits<3> opc, string asm, SDPatternOperator op,
+                             SDPatternOperator rev_op> {
   def _B : sve_int_while_rr<0b00, { 0, opc }, asm, GPR32, PPR8>;
   def _H : sve_int_while_rr<0b01, { 0, opc }, asm, GPR32, PPR16>;
   def _S : sve_int_while_rr<0b10, { 0, opc }, asm, GPR32, PPR32>;
@@ -5369,9 +5370,19 @@ multiclass sve_int_while4_rr<bits<3> opc, string asm, SDPatternOperator op> {
   def : SVE_2_Op_Pat<nxv8i1,  op, i32, i32, !cast<Instruction>(NAME # _H)>;
   def : SVE_2_Op_Pat<nxv4i1,  op, i32, i32, !cast<Instruction>(NAME # _S)>;
   def : SVE_2_Op_Pat<nxv2i1,  op, i32, i32, !cast<Instruction>(NAME # _D)>;
+
+  def : Pat<(nxv16i1 (vector_reverse (rev_op i32:$op2, i32:$op1))),
+            (!cast<Instruction>(NAME # "_B") $op1, $op2)>;
+  def : Pat<(nxv8i1 (vector_reverse (rev_op i32:$op2, i32:$op1))),
+            (!cast<Instruction>(NAME # "_H") $op1, $op2)>;
+  def : Pat<(nxv4i1 (vector_reverse (rev_op i32:$op2, i32:$op1))),
+            (!cast<Instruction>(NAME # "_S") $op1, $op2)>;
+  def : Pat<(nxv2i1 (vector_reverse (rev_op i32:$op2, i32:$op1))),
+            (!cast<Instruction>(NAME # "_D") $op1, $op2)>;
 }
 
-multiclass sve_int_while8_rr<bits<3> opc, string asm, SDPatternOperator op> {
+multiclass sve_int_while8_rr<bits<3> opc, string asm, SDPatternOperator op,
+                             SDPatternOperator rev_op> {
   def _B : sve_int_while_rr<0b00, { 1, opc }, asm, GPR64, PPR8>;
   def _H : sve_int_while_rr<0b01, { 1, opc }, asm, GPR64, PPR16>;
   def _S : sve_int_while_rr<0b10, { 1, opc }, asm, GPR64, PPR32>;
@@ -5381,6 +5392,15 @@ multiclass sve_int_while8_rr<bits<3> opc, string asm, SDPatternOperator op> {
   def : SVE_2_Op_Pat<nxv8i1,  op, i64, i64, !cast<Instruction>(NAME # _H)>;
   def : SVE_2_Op_Pat<nxv4i1,  op, i64, i64, !cast<Instruction>(NAME # _S)>;
   def : SVE_2_Op_Pat<nxv2i1,  op, i64, i64, !cast<Instruction>(NAME # _D)>;
+
+  def : Pat<(nxv16i1 (vector_reverse (rev_op i64:$op2, i64:$op1))),
+            (!cast<Instruction>(NAME # "_B") $op1, $op2)>;
+  def : Pat<(nxv8i1 (vector_reverse (rev_op i64:$op2, i64:$op1))),
+            (!cast<Instruction>(NAME # "_H") $op1, $op2)>;
+  def : Pat<(nxv4i1 (vector_reverse (rev_op i64:$op2, i64:$op1))),
+            (!cast<Instruction>(NAME # "_S") $op1, $op2)>;
+  def : Pat<(nxv2i1 (vector_reverse (rev_op i64:$op2, i64:$op1))),
+            (!cast<Instruction>(NAME # "_D") $op1, $op2)>;
 }
 
 class sve2_int_while_rr<bits<2> sz8_64, bits<1> rw, string asm,
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-while-reversed.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-while-reversed.ll
index 82b82b59c3f989..b31922b8bc30ad 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-while-reversed.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-while-reversed.ll
@@ -11,8 +11,7 @@ target triple = "aarch64-unknown-linux-gnu"
 define <vscale x 16 x i1> @whilege_b_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilege_b_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilele p0.b, w1, w0
-; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    whilege p0.b, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
@@ -22,8 +21,7 @@ define <vscale x 16 x i1> @whilege_b_ww(i32 %a, i32 %b) {
 define <vscale x 16 x i1> @whilege_b_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilege_b_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilele p0.b, x1, x0
-; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    whilege p0.b, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
@@ -33,8 +31,7 @@ define <vscale x 16 x i1> @whilege_b_xx(i64 %a, i64 %b) {
 define <vscale x 8 x i1> @whilege_h_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilege_h_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilele p0.h, w1, w0
-; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    whilege p0.h, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilele.nxv8i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
@@ -44,8 +41,7 @@ define <vscale x 8 x i1> @whilege_h_ww(i32 %a, i32 %b) {
 define <vscale x 8 x i1> @whilege_h_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilege_h_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilele p0.h, x1, x0
-; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    whilege p0.h, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilele.nxv8i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
@@ -55,8 +51,7 @@ define <vscale x 8 x i1> @whilege_h_xx(i64 %a, i64 %b) {
 define <vscale x 4 x i1> @whilege_s_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilege_s_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilele p0.s, w1, w0
-; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    whilege p0.s, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilele.nxv4i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
@@ -66,8 +61,7 @@ define <vscale x 4 x i1> @whilege_s_ww(i32 %a, i32 %b) {
 define <vscale x 4 x i1> @whilege_s_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilege_s_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilele p0.s, x1, x0
-; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    whilege p0.s, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilele.nxv4i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
@@ -77,8 +71,7 @@ define <vscale x 4 x i1> @whilege_s_xx(i64 %a, i64 %b) {
 define <vscale x 2 x i1> @whilege_d_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilege_d_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilele p0.d, w1, w0
-; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    whilege p0.d, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
@@ -88,8 +81,7 @@ define <vscale x 2 x i1> @whilege_d_ww(i32 %a, i32 %b) {
 define <vscale x 2 x i1> @whilege_d_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilege_d_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilele p0.d, x1, x0
-; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    whilege p0.d, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
@@ -103,8 +95,7 @@ define <vscale x 2 x i1> @whilege_d_xx(i64 %a, i64 %b) {
 define <vscale x 16 x i1> @whilehs_b_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilehs_b_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilels p0.b, w1, w0
-; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    whilehs p0.b, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
@@ -114,8 +105,7 @@ define <vscale x 16 x i1> @whilehs_b_ww(i32 %a, i32 %b) {
 define <vscale x 16 x i1> @whilehs_b_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilehs_b_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilels p0.b, x1, x0
-; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    whilehs p0.b, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
@@ -125,8 +115,7 @@ define <vscale x 16 x i1> @whilehs_b_xx(i64 %a, i64 %b) {
 define <vscale x 8 x i1> @whilehs_h_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilehs_h_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilels p0.h, w1, w0
-; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    whilehs p0.h, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilels.nxv8i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
@@ -136,8 +125,7 @@ define <vscale x 8 x i1> @whilehs_h_ww(i32 %a, i32 %b) {
 define <vscale x 8 x i1> @whilehs_h_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilehs_h_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilels p0.h, x1, x0
-; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    whilehs p0.h, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilels.nxv8i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
@@ -147,8 +135,7 @@ define <vscale x 8 x i1> @whilehs_h_xx(i64 %a, i64 %b) {
 define <vscale x 4 x i1> @whilehs_s_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilehs_s_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilels p0.s, w1, w0
-; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    whilehs p0.s, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilels.nxv4i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
@@ -158,8 +145,7 @@ define <vscale x 4 x i1> @whilehs_s_ww(i32 %a, i32 %b) {
 define <vscale x 4 x i1> @whilehs_s_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilehs_s_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilels p0.s, x1, x0
-; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    whilehs p0.s, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilels.nxv4i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
@@ -169,8 +155,7 @@ define <vscale x 4 x i1> @whilehs_s_xx(i64 %a, i64 %b) {
 define <vscale x 2 x i1> @whilehs_d_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilehs_d_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilels p0.d, w1, w0
-; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    whilehs p0.d, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilels.nxv2i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
@@ -180,8 +165,7 @@ define <vscale x 2 x i1> @whilehs_d_ww(i32 %a, i32 %b) {
 define <vscale x 2 x i1> @whilehs_d_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilehs_d_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilels p0.d, x1, x0
-; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    whilehs p0.d, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilels.nxv2i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
@@ -195,8 +179,7 @@ define <vscale x 2 x i1> @whilehs_d_xx(i64 %a, i64 %b) {
 define <vscale x 16 x i1> @whilegt_b_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilegt_b_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilelt p0.b, w1, w0
-; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    whilegt p0.b, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
@@ -206,8 +189,7 @@ define <vscale x 16 x i1> @whilegt_b_ww(i32 %a, i32 %b) {
 define <vscale x 16 x i1> @whilegt_b_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilegt_b_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilelt p0.b, x1, x0
-; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    whilegt p0.b, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
@@ -217,8 +199,7 @@ define <vscale x 16 x i1> @whilegt_b_xx(i64 %a, i64 %b) {
 define <vscale x 8 x i1> @whilegt_h_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilegt_h_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilelt p0.h, w1, w0
-; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    whilegt p0.h, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilelt.nxv8i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
@@ -228,8 +209,7 @@ define <vscale x 8 x i1> @whilegt_h_ww(i32 %a, i32 %b) {
 define <vscale x 8 x i1> @whilegt_h_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilegt_h_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilelt p0.h, x1, x0
-; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    whilegt p0.h, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilelt.nxv8i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
@@ -239,8 +219,7 @@ define <vscale x 8 x i1> @whilegt_h_xx(i64 %a, i64 %b) {
 define <vscale x 4 x i1> @whilegt_s_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilegt_s_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilelt p0.s, w1, w0
-; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    whilegt p0.s, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelt.nxv4i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
@@ -250,8 +229,7 @@ define <vscale x 4 x i1> @whilegt_s_ww(i32 %a, i32 %b) {
 define <vscale x 4 x i1> @whilegt_s_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilegt_s_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilelt p0.s, x1, x0
-; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    whilegt p0.s, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelt.nxv4i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
@@ -261,8 +239,7 @@ define <vscale x 4 x i1> @whilegt_s_xx(i64 %a, i64 %b) {
 define <vscale x 2 x i1> @whilegt_d_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilegt_d_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilelt p0.d, w1, w0
-; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    whilegt p0.d, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
@@ -272,8 +249,7 @@ define <vscale x 2 x i1> @whilegt_d_ww(i32 %a, i32 %b) {
 define <vscale x 2 x i1> @whilegt_d_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilegt_d_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilelt p0.d, x1, x0
-; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    whilegt p0.d, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
@@ -287,8 +263,7 @@ define <vscale x 2 x i1> @whilegt_d_xx(i64 %a, i64 %b) {
 define <vscale x 16 x i1> @whilehi_b_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilehi_b_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilelo p0.b, w1, w0
-; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    whilehi p0.b, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
@@ -298,8 +273,7 @@ define <vscale x 16 x i1> @whilehi_b_ww(i32 %a, i32 %b) {
 define <vscale x 16 x i1> @whilehi_b_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilehi_b_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilelo p0.b, x1, x0
-; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    whilehi p0.b, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
@@ -309,8 +283,7 @@ define <vscale x 16 x i1> @whilehi_b_xx(i64 %a, i64 %b) {
 define <vscale x 8 x i1> @whilehi_h_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilehi_h_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilelo p0.h, w1, w0
-; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    whilehi p0.h, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilelo.nxv8i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
@@ -320,8 +293,7 @@ define <vscale x 8 x i1> @whilehi_h_ww(i32 %a, i32 %b) {
 define <vscale x 8 x i1> @whilehi_h_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilehi_h_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilelo p0.h, x1, x0
-; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    whilehi p0.h, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilelo.nxv8i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
@@ -331,8 +303,7 @@ define <vscale x 8 x i1> @whilehi_h_xx(i64 %a, i64 %b) {
 define <vscale x 4 x i1> @whilehi_s_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilehi_s_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilelo p0.s, w1, w0
-; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    whilehi p0.s, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
@@ -342,8 +313,7 @@ define <vscale x 4 x i1> @whilehi_s_ww(i32 %a, i32 %b) {
 define <vscale x 4 x i1> @whilehi_s_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilehi_s_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilelo p0.s, x1, x0
-; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    whilehi p0.s, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
@@ -353,8 +323,7 @@ define <vscale x 4 x i1> @whilehi_s_xx(i64 %a, i64 %b) {
 define <vscale x 2 x i1> @whilehi_d_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilehi_d_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilelo p0.d, w1, w0
-; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    whilehi p0.d, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelo.nxv2i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
@@ -364,8 +333,7 @@ define <vscale x 2 x i1> @whilehi_d_ww(i32 %a, i32 %b) {
 define <vscale x 2 x i1> @whilehi_d_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilehi_d_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilelo p0.d, x1, x0
-; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    whilehi p0.d, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelo.nxv2i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
@@ -379,8 +347,7 @@ define <vscale x 2 x i1> @whilehi_d_xx(i64 %a, i64 %b) {
 define <vscale x 16 x i1> @whilele_b_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilele_b_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilege p0.b, w1, w0
-; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    whilele p0.b, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
@@ -390,8 +357,7 @@ define <vscale x 16 x i1> @whilele_b_ww(i32 %a, i32 %b) {
 define <vscale x 16 x i1> @whilele_b_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilele_b_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilege p0.b, x1, x0
-; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    whilele p0.b, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
@@ -401,8 +367,7 @@ define <vscale x 16 x i1> @whilele_b_xx(i64 %a, i64 %b) {
 define <vscale x 8 x i1> @whilele_h_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilele_h_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilege p0.h, w1, w0
-; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    whilele p0.h, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilege.nxv8i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
@@ -412,8 +377,7 @@ define <vscale x 8 x i1> @whilele_h_ww(i32 %a, i32 %b) {
 define <vscale x 8 x i1> @whilele_h_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilele_h_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilege p0.h, x1, x0
-; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    whilele p0.h, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilege.nxv8i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
@@ -423,8 +387,7 @@ define <vscale x 8 x i1> @whilele_h_xx(i64 %a, i64 %b) {
 define <vscale x 4 x i1> @whilele_s_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilele_s_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilege p0.s, w1, w0
-; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    whilele p0.s, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
@@ -434,8 +397,7 @@ define <vscale x 4 x i1> @whilele_s_ww(i32 %a, i32 %b) {
 define <vscale x 4 x i1> @whilele_s_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilele_s_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilege p0.s, x1, x0
-; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    whilele p0.s, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
@@ -445,8 +407,7 @@ define <vscale x 4 x i1> @whilele_s_xx(i64 %a, i64 %b) {
 define <vscale x 2 x i1> @whilele_d_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilele_d_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilege p0.d, w1, w0
-; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    whilele p0.d, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
@@ -456,8 +417,7 @@ define <vscale x 2 x i1> @whilele_d_ww(i32 %a, i32 %b) {
 define <vscale x 2 x i1> @whilele_d_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilele_d_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilege p0.d, x1, x0
-; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    whilele p0.d, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
@@ -471,8 +431,7 @@ define <vscale x 2 x i1> @whilele_d_xx(i64 %a, i64 %b) {
 define <vscale x 16 x i1> @whilelo_b_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilelo_b_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilehi p0.b, w1, w0
-; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    whilelo p0.b, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
@@ -482,8 +441,7 @@ define <vscale x 16 x i1> @whilelo_b_ww(i32 %a, i32 %b) {
 define <vscale x 16 x i1> @whilelo_b_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilelo_b_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilehi p0.b, x1, x0
-; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    whilelo p0.b, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
@@ -493,8 +451,7 @@ define <vscale x 16 x i1> @whilelo_b_xx(i64 %a, i64 %b) {
 define <vscale x 8 x i1> @whilelo_h_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilelo_h_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilehi p0.h, w1, w0
-; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    whilelo p0.h, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilehi.nxv8i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
@@ -504,8 +461,7 @@ define <vscale x 8 x i1> @whilelo_h_ww(i32 %a, i32 %b) {
 define <vscale x 8 x i1> @whilelo_h_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilelo_h_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilehi p0.h, x1, x0
-; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    whilelo p0.h, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilehi.nxv8i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
@@ -515,8 +471,7 @@ define <vscale x 8 x i1> @whilelo_h_xx(i64 %a, i64 %b) {
 define <vscale x 4 x i1> @whilelo_s_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilelo_s_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilehi p0.s, w1, w0
-; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    whilelo p0.s, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilehi.nxv4i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
@@ -526,8 +481,7 @@ define <vscale x 4 x i1> @whilelo_s_ww(i32 %a, i32 %b) {
 define <vscale x 4 x i1> @whilelo_s_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilelo_s_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilehi p0.s, x1, x0
-; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    whilelo p0.s, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilehi.nxv4i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
@@ -537,8 +491,7 @@ define <vscale x 4 x i1> @whilelo_s_xx(i64 %a, i64 %b) {
 define <vscale x 2 x i1> @whilelo_d_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilelo_d_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilehi p0.d, w1, w0
-; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    whilelo p0.d, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehi.nxv2i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
@@ -548,8 +501,7 @@ define <vscale x 2 x i1> @whilelo_d_ww(i32 %a, i32 %b) {
 define <vscale x 2 x i1> @whilelo_d_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilelo_d_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilehi p0.d, x1, x0
-; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    whilelo p0.d, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehi.nxv2i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
@@ -563,8 +515,7 @@ define <vscale x 2 x i1> @whilelo_d_xx(i64 %a, i64 %b) {
 define <vscale x 16 x i1> @whilels_b_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilels_b_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilehs p0.b, w1, w0
-; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    whilels p0.b, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
@@ -574,8 +525,7 @@ define <vscale x 16 x i1> @whilels_b_ww(i32 %a, i32 %b) {
 define <vscale x 16 x i1> @whilels_b_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilels_b_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilehs p0.b, x1, x0
-; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    whilels p0.b, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
@@ -585,8 +535,7 @@ define <vscale x 16 x i1> @whilels_b_xx(i64 %a, i64 %b) {
 define <vscale x 8 x i1> @whilels_h_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilels_h_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilehs p0.h, w1, w0
-; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    whilels p0.h, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilehs.nxv8i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
@@ -596,8 +545,7 @@ define <vscale x 8 x i1> @whilels_h_ww(i32 %a, i32 %b) {
 define <vscale x 8 x i1> @whilels_h_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilels_h_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilehs p0.h, x1, x0
-; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    whilels p0.h, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilehs.nxv8i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
@@ -607,8 +555,7 @@ define <vscale x 8 x i1> @whilels_h_xx(i64 %a, i64 %b) {
 define <vscale x 4 x i1> @whilels_s_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilels_s_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilehs p0.s, w1, w0
-; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    whilels p0.s, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilehs.nxv4i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
@@ -618,8 +565,7 @@ define <vscale x 4 x i1> @whilels_s_ww(i32 %a, i32 %b) {
 define <vscale x 4 x i1> @whilels_s_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilels_s_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilehs p0.s, x1, x0
-; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    whilels p0.s, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilehs.nxv4i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
@@ -629,8 +575,7 @@ define <vscale x 4 x i1> @whilels_s_xx(i64 %a, i64 %b) {
 define <vscale x 2 x i1> @whilels_d_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilels_d_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilehs p0.d, w1, w0
-; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    whilels p0.d, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehs.nxv2i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
@@ -640,8 +585,7 @@ define <vscale x 2 x i1> @whilels_d_ww(i32 %a, i32 %b) {
 define <vscale x 2 x i1> @whilels_d_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilels_d_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilehs p0.d, x1, x0
-; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    whilels p0.d, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehs.nxv2i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
@@ -655,8 +599,7 @@ define <vscale x 2 x i1> @whilels_d_xx(i64 %a, i64 %b) {
 define <vscale x 16 x i1> @whilelt_b_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilelt_b_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilegt p0.b, w1, w0
-; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    whilelt p0.b, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
@@ -666,8 +609,7 @@ define <vscale x 16 x i1> @whilelt_b_ww(i32 %a, i32 %b) {
 define <vscale x 16 x i1> @whilelt_b_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilelt_b_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilegt p0.b, x1, x0
-; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    whilelt p0.b, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
@@ -677,8 +619,7 @@ define <vscale x 16 x i1> @whilelt_b_xx(i64 %a, i64 %b) {
 define <vscale x 8 x i1> @whilelt_h_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilelt_h_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilegt p0.h, w1, w0
-; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    whilelt p0.h, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilegt.nxv8i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
@@ -688,8 +629,7 @@ define <vscale x 8 x i1> @whilelt_h_ww(i32 %a, i32 %b) {
 define <vscale x 8 x i1> @whilelt_h_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilelt_h_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilegt p0.h, x1, x0
-; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    whilelt p0.h, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilegt.nxv8i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
@@ -699,8 +639,7 @@ define <vscale x 8 x i1> @whilelt_h_xx(i64 %a, i64 %b) {
 define <vscale x 4 x i1> @whilelt_s_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilelt_s_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilegt p0.s, w1, w0
-; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    whilelt p0.s, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilegt.nxv4i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
@@ -710,8 +649,7 @@ define <vscale x 4 x i1> @whilelt_s_ww(i32 %a, i32 %b) {
 define <vscale x 4 x i1> @whilelt_s_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilelt_s_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilegt p0.s, x1, x0
-; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    whilelt p0.s, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilegt.nxv4i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
@@ -721,8 +659,7 @@ define <vscale x 4 x i1> @whilelt_s_xx(i64 %a, i64 %b) {
 define <vscale x 2 x i1> @whilelt_d_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilelt_d_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilegt p0.d, w1, w0
-; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    whilelt p0.d, w0, w1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilegt.nxv2i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
@@ -732,8 +669,7 @@ define <vscale x 2 x i1> @whilelt_d_ww(i32 %a, i32 %b) {
 define <vscale x 2 x i1> @whilelt_d_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilelt_d_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilegt p0.d, x1, x0
-; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    whilelt p0.d, x0, x1
 ; CHECK-NEXT:    ret
   %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilegt.nxv2i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)

>From 8af2994ca59e33d62247ae030ff4687d5c8fe2fc Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Wed, 17 Apr 2024 10:38:36 +0000
Subject: [PATCH 3/3] Prevent unsafe transformation for the equality variants.

---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  16 +--
 .../AArch64/sve2-intrinsics-while-reversed.ll | 128 +++++++++++++-----
 2 files changed, 104 insertions(+), 40 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index b37d926ab1816b..826dc092472b0a 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2038,14 +2038,14 @@ let Predicates = [HasSVEorSME] in {
   defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne", SETUNE, SETNE, SETUNE, SETNE>;
 
   defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt", int_aarch64_sve_whilelt, int_aarch64_sve_whilegt>;
-  defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele", int_aarch64_sve_whilele, int_aarch64_sve_whilege>;
+  defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele", int_aarch64_sve_whilele, null_frag>;
   defm WHILELO_PWW : sve_int_while4_rr<0b110, "whilelo", int_aarch64_sve_whilelo, int_aarch64_sve_whilehi>;
-  defm WHILELS_PWW : sve_int_while4_rr<0b111, "whilels", int_aarch64_sve_whilels, int_aarch64_sve_whilehs>;
+  defm WHILELS_PWW : sve_int_while4_rr<0b111, "whilels", int_aarch64_sve_whilels, null_frag>;
 
   defm WHILELT_PXX : sve_int_while8_rr<0b010, "whilelt", int_aarch64_sve_whilelt, int_aarch64_sve_whilegt>;
-  defm WHILELE_PXX : sve_int_while8_rr<0b011, "whilele", int_aarch64_sve_whilele, int_aarch64_sve_whilege>;
+  defm WHILELE_PXX : sve_int_while8_rr<0b011, "whilele", int_aarch64_sve_whilele, null_frag>;
   defm WHILELO_PXX : sve_int_while8_rr<0b110, "whilelo", int_aarch64_sve_whilelo, int_aarch64_sve_whilehi>;
-  defm WHILELS_PXX : sve_int_while8_rr<0b111, "whilels", int_aarch64_sve_whilels, int_aarch64_sve_whilehs>;
+  defm WHILELS_PXX : sve_int_while8_rr<0b111, "whilels", int_aarch64_sve_whilels, null_frag>;
 
   def CTERMEQ_WW : sve_int_cterm<0b0, 0b0, "ctermeq", GPR32>;
   def CTERMNE_WW : sve_int_cterm<0b0, 0b1, "ctermne", GPR32>;
@@ -3792,14 +3792,14 @@ let Predicates = [HasSVE2orSME] in {
   defm TBX_ZZZ  : sve2_int_perm_tbx<"tbx", 0b01, int_aarch64_sve_tbx>;
 
   // SVE2 integer compare scalar count and limit
-  defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege", int_aarch64_sve_whilege, int_aarch64_sve_whilele>;
+  defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege", int_aarch64_sve_whilege, null_frag>;
   defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt", int_aarch64_sve_whilegt, int_aarch64_sve_whilelt>;
-  defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs", int_aarch64_sve_whilehs, int_aarch64_sve_whilels>;
+  defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs", int_aarch64_sve_whilehs, null_frag>;
   defm WHILEHI_PWW : sve_int_while4_rr<0b101, "whilehi", int_aarch64_sve_whilehi, int_aarch64_sve_whilelo>;
 
-  defm WHILEGE_PXX : sve_int_while8_rr<0b000, "whilege", int_aarch64_sve_whilege, int_aarch64_sve_whilele>;
+  defm WHILEGE_PXX : sve_int_while8_rr<0b000, "whilege", int_aarch64_sve_whilege, null_frag>;
   defm WHILEGT_PXX : sve_int_while8_rr<0b001, "whilegt", int_aarch64_sve_whilegt, int_aarch64_sve_whilelt>;
-  defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs", int_aarch64_sve_whilehs, int_aarch64_sve_whilels>;
+  defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs", int_aarch64_sve_whilehs, null_frag>;
   defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi", int_aarch64_sve_whilehi, int_aarch64_sve_whilelo>;
 
   // SVE2 pointer conflict compare
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-while-reversed.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-while-reversed.ll
index b31922b8bc30ad..cb74cd8032abea 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-while-reversed.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-while-reversed.ll
@@ -8,80 +8,96 @@ target triple = "aarch64-unknown-linux-gnu"
 ; WHILEGE
 ;
 
+; Cannot invert the while predicate when %a is MAX_INT.
 define <vscale x 16 x i1> @whilege_b_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilege_b_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilege p0.b, w0, w1
+; CHECK-NEXT:    whilele p0.b, w1, w0
+; CHECK-NEXT:    rev p0.b, p0.b
 ; CHECK-NEXT:    ret
   %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
   ret <vscale x 16 x i1> %while.rev
 }
 
+; Cannot invert the while predicate when %a is MAX_INT.
 define <vscale x 16 x i1> @whilege_b_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilege_b_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilege p0.b, x0, x1
+; CHECK-NEXT:    whilele p0.b, x1, x0
+; CHECK-NEXT:    rev p0.b, p0.b
 ; CHECK-NEXT:    ret
   %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
   ret <vscale x 16 x i1> %while.rev
 }
 
+; Cannot invert the while predicate when %a is MAX_INT.
 define <vscale x 8 x i1> @whilege_h_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilege_h_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilege p0.h, w0, w1
+; CHECK-NEXT:    whilele p0.h, w1, w0
+; CHECK-NEXT:    rev p0.h, p0.h
 ; CHECK-NEXT:    ret
   %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilele.nxv8i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
   ret <vscale x 8 x i1> %while.rev
 }
 
+; Cannot invert the while predicate when %a is MAX_INT.
 define <vscale x 8 x i1> @whilege_h_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilege_h_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilege p0.h, x0, x1
+; CHECK-NEXT:    whilele p0.h, x1, x0
+; CHECK-NEXT:    rev p0.h, p0.h
 ; CHECK-NEXT:    ret
   %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilele.nxv8i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
   ret <vscale x 8 x i1> %while.rev
 }
 
+; Cannot invert the while predicate when %a is MAX_INT.
 define <vscale x 4 x i1> @whilege_s_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilege_s_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilege p0.s, w0, w1
+; CHECK-NEXT:    whilele p0.s, w1, w0
+; CHECK-NEXT:    rev p0.s, p0.s
 ; CHECK-NEXT:    ret
   %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilele.nxv4i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
   ret <vscale x 4 x i1> %while.rev
 }
 
+; Cannot invert the while predicate when %a is MAX_INT.
 define <vscale x 4 x i1> @whilege_s_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilege_s_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilege p0.s, x0, x1
+; CHECK-NEXT:    whilele p0.s, x1, x0
+; CHECK-NEXT:    rev p0.s, p0.s
 ; CHECK-NEXT:    ret
   %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilele.nxv4i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
   ret <vscale x 4 x i1> %while.rev
 }
 
+; Cannot invert the while predicate when %a is MAX_INT.
 define <vscale x 2 x i1> @whilege_d_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilege_d_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilege p0.d, w0, w1
+; CHECK-NEXT:    whilele p0.d, w1, w0
+; CHECK-NEXT:    rev p0.d, p0.d
 ; CHECK-NEXT:    ret
   %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
   ret <vscale x 2 x i1> %while.rev
 }
 
+; Cannot invert the while predicate when %a is MAX_INT.
 define <vscale x 2 x i1> @whilege_d_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilege_d_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilege p0.d, x0, x1
+; CHECK-NEXT:    whilele p0.d, x1, x0
+; CHECK-NEXT:    rev p0.d, p0.d
 ; CHECK-NEXT:    ret
   %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
@@ -92,80 +108,96 @@ define <vscale x 2 x i1> @whilege_d_xx(i64 %a, i64 %b) {
 ; WHILEHS
 ;
 
+; Cannot invert the while predicate when %a is MAX_UINT.
 define <vscale x 16 x i1> @whilehs_b_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilehs_b_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilehs p0.b, w0, w1
+; CHECK-NEXT:    whilels p0.b, w1, w0
+; CHECK-NEXT:    rev p0.b, p0.b
 ; CHECK-NEXT:    ret
   %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
   ret <vscale x 16 x i1> %while.rev
 }
 
+; Cannot invert the while predicate when %a is MAX_UINT.
 define <vscale x 16 x i1> @whilehs_b_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilehs_b_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilehs p0.b, x0, x1
+; CHECK-NEXT:    whilels p0.b, x1, x0
+; CHECK-NEXT:    rev p0.b, p0.b
 ; CHECK-NEXT:    ret
   %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
   ret <vscale x 16 x i1> %while.rev
 }
 
+; Cannot invert the while predicate when %a is MAX_UINT.
 define <vscale x 8 x i1> @whilehs_h_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilehs_h_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilehs p0.h, w0, w1
+; CHECK-NEXT:    whilels p0.h, w1, w0
+; CHECK-NEXT:    rev p0.h, p0.h
 ; CHECK-NEXT:    ret
   %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilels.nxv8i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
   ret <vscale x 8 x i1> %while.rev
 }
 
+; Cannot invert the while predicate when %a is MAX_UINT.
 define <vscale x 8 x i1> @whilehs_h_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilehs_h_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilehs p0.h, x0, x1
+; CHECK-NEXT:    whilels p0.h, x1, x0
+; CHECK-NEXT:    rev p0.h, p0.h
 ; CHECK-NEXT:    ret
   %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilels.nxv8i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
   ret <vscale x 8 x i1> %while.rev
 }
 
+; Cannot invert the while predicate when %a is MAX_UINT.
 define <vscale x 4 x i1> @whilehs_s_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilehs_s_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilehs p0.s, w0, w1
+; CHECK-NEXT:    whilels p0.s, w1, w0
+; CHECK-NEXT:    rev p0.s, p0.s
 ; CHECK-NEXT:    ret
   %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilels.nxv4i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
   ret <vscale x 4 x i1> %while.rev
 }
 
+; Cannot invert the while predicate when %a is MAX_UINT.
 define <vscale x 4 x i1> @whilehs_s_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilehs_s_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilehs p0.s, x0, x1
+; CHECK-NEXT:    whilels p0.s, x1, x0
+; CHECK-NEXT:    rev p0.s, p0.s
 ; CHECK-NEXT:    ret
   %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilels.nxv4i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
   ret <vscale x 4 x i1> %while.rev
 }
 
+; Cannot invert the while predicate when %a is MAX_UINT.
 define <vscale x 2 x i1> @whilehs_d_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilehs_d_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilehs p0.d, w0, w1
+; CHECK-NEXT:    whilels p0.d, w1, w0
+; CHECK-NEXT:    rev p0.d, p0.d
 ; CHECK-NEXT:    ret
   %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilels.nxv2i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
   ret <vscale x 2 x i1> %while.rev
 }
 
+; Cannot invert the while predicate when %a is MAX_UINT.
 define <vscale x 2 x i1> @whilehs_d_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilehs_d_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilehs p0.d, x0, x1
+; CHECK-NEXT:    whilels p0.d, x1, x0
+; CHECK-NEXT:    rev p0.d, p0.d
 ; CHECK-NEXT:    ret
   %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilels.nxv2i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
@@ -344,80 +376,96 @@ define <vscale x 2 x i1> @whilehi_d_xx(i64 %a, i64 %b) {
 ; WHILELE
 ;
 
+; Cannot invert the while predicate when %a is MIN_INT.
 define <vscale x 16 x i1> @whilele_b_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilele_b_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilele p0.b, w0, w1
+; CHECK-NEXT:    whilege p0.b, w1, w0
+; CHECK-NEXT:    rev p0.b, p0.b
 ; CHECK-NEXT:    ret
   %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
   ret <vscale x 16 x i1> %while.rev
 }
 
+; Cannot invert the while predicate when %a is MIN_INT.
 define <vscale x 16 x i1> @whilele_b_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilele_b_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilele p0.b, x0, x1
+; CHECK-NEXT:    whilege p0.b, x1, x0
+; CHECK-NEXT:    rev p0.b, p0.b
 ; CHECK-NEXT:    ret
   %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
   ret <vscale x 16 x i1> %while.rev
 }
 
+; Cannot invert the while predicate when %a is MIN_INT.
 define <vscale x 8 x i1> @whilele_h_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilele_h_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilele p0.h, w0, w1
+; CHECK-NEXT:    whilege p0.h, w1, w0
+; CHECK-NEXT:    rev p0.h, p0.h
 ; CHECK-NEXT:    ret
   %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilege.nxv8i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
   ret <vscale x 8 x i1> %while.rev
 }
 
+; Cannot invert the while predicate when %a is MIN_INT.
 define <vscale x 8 x i1> @whilele_h_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilele_h_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilele p0.h, x0, x1
+; CHECK-NEXT:    whilege p0.h, x1, x0
+; CHECK-NEXT:    rev p0.h, p0.h
 ; CHECK-NEXT:    ret
   %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilege.nxv8i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
   ret <vscale x 8 x i1> %while.rev
 }
 
+; Cannot invert the while predicate when %a is MIN_INT.
 define <vscale x 4 x i1> @whilele_s_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilele_s_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilele p0.s, w0, w1
+; CHECK-NEXT:    whilege p0.s, w1, w0
+; CHECK-NEXT:    rev p0.s, p0.s
 ; CHECK-NEXT:    ret
   %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
   ret <vscale x 4 x i1> %while.rev
 }
 
+; Cannot invert the while predicate when %a is MIN_INT.
 define <vscale x 4 x i1> @whilele_s_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilele_s_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilele p0.s, x0, x1
+; CHECK-NEXT:    whilege p0.s, x1, x0
+; CHECK-NEXT:    rev p0.s, p0.s
 ; CHECK-NEXT:    ret
   %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
   ret <vscale x 4 x i1> %while.rev
 }
 
+; Cannot invert the while predicate when %a is MIN_INT.
 define <vscale x 2 x i1> @whilele_d_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilele_d_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilele p0.d, w0, w1
+; CHECK-NEXT:    whilege p0.d, w1, w0
+; CHECK-NEXT:    rev p0.d, p0.d
 ; CHECK-NEXT:    ret
   %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
   ret <vscale x 2 x i1> %while.rev
 }
 
+; Cannot invert the while predicate when %a is MIN_INT.
 define <vscale x 2 x i1> @whilele_d_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilele_d_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilele p0.d, x0, x1
+; CHECK-NEXT:    whilege p0.d, x1, x0
+; CHECK-NEXT:    rev p0.d, p0.d
 ; CHECK-NEXT:    ret
   %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
@@ -512,80 +560,96 @@ define <vscale x 2 x i1> @whilelo_d_xx(i64 %a, i64 %b) {
 ; WHILELS
 ;
 
+; Cannot invert the while predicate when %a is MIN_UINT.
 define <vscale x 16 x i1> @whilels_b_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilels_b_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilels p0.b, w0, w1
+; CHECK-NEXT:    whilehs p0.b, w1, w0
+; CHECK-NEXT:    rev p0.b, p0.b
 ; CHECK-NEXT:    ret
   %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
   ret <vscale x 16 x i1> %while.rev
 }
 
+; Cannot invert the while predicate when %a is MIN_UINT.
 define <vscale x 16 x i1> @whilels_b_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilels_b_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilels p0.b, x0, x1
+; CHECK-NEXT:    whilehs p0.b, x1, x0
+; CHECK-NEXT:    rev p0.b, p0.b
 ; CHECK-NEXT:    ret
   %while = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %while)
   ret <vscale x 16 x i1> %while.rev
 }
 
+; Cannot invert the while predicate when %a is MIN_UINT.
 define <vscale x 8 x i1> @whilels_h_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilels_h_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilels p0.h, w0, w1
+; CHECK-NEXT:    whilehs p0.h, w1, w0
+; CHECK-NEXT:    rev p0.h, p0.h
 ; CHECK-NEXT:    ret
   %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilehs.nxv8i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
   ret <vscale x 8 x i1> %while.rev
 }
 
+; Cannot invert the while predicate when %a is MIN_UINT.
 define <vscale x 8 x i1> @whilels_h_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilels_h_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilels p0.h, x0, x1
+; CHECK-NEXT:    whilehs p0.h, x1, x0
+; CHECK-NEXT:    rev p0.h, p0.h
 ; CHECK-NEXT:    ret
   %while = call <vscale x 8 x i1> @llvm.aarch64.sve.whilehs.nxv8i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %while)
   ret <vscale x 8 x i1> %while.rev
 }
 
+; Cannot invert the while predicate when %a is MIN_UINT.
 define <vscale x 4 x i1> @whilels_s_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilels_s_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilels p0.s, w0, w1
+; CHECK-NEXT:    whilehs p0.s, w1, w0
+; CHECK-NEXT:    rev p0.s, p0.s
 ; CHECK-NEXT:    ret
   %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilehs.nxv4i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
   ret <vscale x 4 x i1> %while.rev
 }
 
+; Cannot invert the while predicate when %a is MIN_UINT.
 define <vscale x 4 x i1> @whilels_s_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilels_s_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilels p0.s, x0, x1
+; CHECK-NEXT:    whilehs p0.s, x1, x0
+; CHECK-NEXT:    rev p0.s, p0.s
 ; CHECK-NEXT:    ret
   %while = call <vscale x 4 x i1> @llvm.aarch64.sve.whilehs.nxv4i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %while)
   ret <vscale x 4 x i1> %while.rev
 }
 
+; Cannot invert the while predicate when %a is MIN_UINT.
 define <vscale x 2 x i1> @whilels_d_ww(i32 %a, i32 %b) {
 ; CHECK-LABEL: whilels_d_ww:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilels p0.d, w0, w1
+; CHECK-NEXT:    whilehs p0.d, w1, w0
+; CHECK-NEXT:    rev p0.d, p0.d
 ; CHECK-NEXT:    ret
   %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehs.nxv2i1.i32(i32 %b, i32 %a)
   %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)
   ret <vscale x 2 x i1> %while.rev
 }
 
+; Cannot invert the while predicate when %a is MIN_UINT.
 define <vscale x 2 x i1> @whilels_d_xx(i64 %a, i64 %b) {
 ; CHECK-LABEL: whilels_d_xx:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    whilels p0.d, x0, x1
+; CHECK-NEXT:    whilehs p0.d, x1, x0
+; CHECK-NEXT:    rev p0.d, p0.d
 ; CHECK-NEXT:    ret
   %while = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehs.nxv2i1.i64(i64 %b, i64 %a)
   %while.rev = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %while)