[clang] [llvm] [Arch64][SVE] Lower svrev_* to llvm.vector.reverse (PR #116422)

Sun Feb 9 02:55:25 PST 2025

https://github.com/jf-botto updated https://github.com/llvm/llvm-project/pull/116422

>From 75cc7d90fa8a7f0cde0df969577556ac1098256b Mon Sep 17 00:00:00 2001
From: Jorge Botto <jorge.botto.16 at ucl.ac.uk>
Date: Fri, 15 Nov 2024 18:56:54 +0000
Subject: [PATCH 1/4] Making Clang emit llvm.vector.reverse instead of
 llvm.aarch64.sve.rev

---
 clang/include/clang/Basic/arm_sve.td          |  2 +-
 .../AArch64/sve-intrinsics/acle_sve_rev.c     | 44 +++++++++----------
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index b20383e72e66a37..c954a6582171728 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1060,7 +1060,7 @@ let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
 def SVEXT        : SInst<"svext[_{d}]",       "dddi", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ext", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckExtract, 1>]>;
 defm SVLASTA     : SVEPerm<"svlasta[_{d}]",   "sPd",  "aarch64_sve_lasta">;
 defm SVLASTB     : SVEPerm<"svlastb[_{d}]",   "sPd",  "aarch64_sve_lastb">;
-def SVREV        : SInst<"svrev[_{d}]",       "dd",   "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_rev", [VerifyRuntimeMode]>;
+def SVREV        : SInst<"svrev[_{d}]",       "dd",   "csilUcUsUiUlhfd", MergeNone, "vector_reverse", [VerifyRuntimeMode]>;
 def SVSEL        : SInst<"svsel[_{d}]",       "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_sel", [VerifyRuntimeMode]>;
 def SVSPLICE     : SInst<"svsplice[_{d}]",    "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_splice", [VerifyRuntimeMode]>;
 def SVTBL        : SInst<"svtbl[_{d}]",       "ddu",  "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl", [VerifyRuntimeMode]>;
diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c
index 3c0ae7df79644fa..835d1c616aebcb0 100644
--- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c
+++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c
@@ -24,12 +24,12 @@
 
 // CHECK-LABEL: @test_svrev_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8> [[OP:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> [[OP:%.*]])
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z13test_svrev_s8u10__SVInt8_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8> [[OP:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 svint8_t test_svrev_s8(svint8_t op) MODE_ATTR
@@ -39,12 +39,12 @@ svint8_t test_svrev_s8(svint8_t op) MODE_ATTR
 
 // CHECK-LABEL: @test_svrev_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16> [[OP:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> [[OP:%.*]])
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z14test_svrev_s16u11__SVInt16_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16> [[OP:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 svint16_t test_svrev_s16(svint16_t op) MODE_ATTR
@@ -54,12 +54,12 @@ svint16_t test_svrev_s16(svint16_t op) MODE_ATTR
 
 // CHECK-LABEL: @test_svrev_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32> [[OP:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[OP:%.*]])
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z14test_svrev_s32u11__SVInt32_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32> [[OP:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 svint32_t test_svrev_s32(svint32_t op) MODE_ATTR
@@ -69,12 +69,12 @@ svint32_t test_svrev_s32(svint32_t op) MODE_ATTR
 
 // CHECK-LABEL: @test_svrev_s64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64> [[OP:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> [[OP:%.*]])
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z14test_svrev_s64u11__SVInt64_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64> [[OP:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 svint64_t test_svrev_s64(svint64_t op) MODE_ATTR
@@ -84,12 +84,12 @@ svint64_t test_svrev_s64(svint64_t op) MODE_ATTR
 
 // CHECK-LABEL: @test_svrev_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8> [[OP:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> [[OP:%.*]])
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z13test_svrev_u8u11__SVUint8_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8> [[OP:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
 svuint8_t test_svrev_u8(svuint8_t op) MODE_ATTR
@@ -99,12 +99,12 @@ svuint8_t test_svrev_u8(svuint8_t op) MODE_ATTR
 
 // CHECK-LABEL: @test_svrev_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16> [[OP:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> [[OP:%.*]])
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z14test_svrev_u16u12__SVUint16_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16> [[OP:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
 svuint16_t test_svrev_u16(svuint16_t op) MODE_ATTR
@@ -114,12 +114,12 @@ svuint16_t test_svrev_u16(svuint16_t op) MODE_ATTR
 
 // CHECK-LABEL: @test_svrev_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32> [[OP:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[OP:%.*]])
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z14test_svrev_u32u12__SVUint32_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32> [[OP:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
 svuint32_t test_svrev_u32(svuint32_t op) MODE_ATTR
@@ -129,12 +129,12 @@ svuint32_t test_svrev_u32(svuint32_t op) MODE_ATTR
 
 // CHECK-LABEL: @test_svrev_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64> [[OP:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> [[OP:%.*]])
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z14test_svrev_u64u12__SVUint64_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64> [[OP:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
 svuint64_t test_svrev_u64(svuint64_t op) MODE_ATTR
@@ -144,12 +144,12 @@ svuint64_t test_svrev_u64(svuint64_t op) MODE_ATTR
 
 // CHECK-LABEL: @test_svrev_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.rev.nxv8f16(<vscale x 8 x half> [[OP:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.reverse.nxv8f16(<vscale x 8 x half> [[OP:%.*]])
 // CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z14test_svrev_f16u13__SVFloat16_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.rev.nxv8f16(<vscale x 8 x half> [[OP:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.reverse.nxv8f16(<vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 svfloat16_t test_svrev_f16(svfloat16_t op) MODE_ATTR
@@ -159,12 +159,12 @@ svfloat16_t test_svrev_f16(svfloat16_t op) MODE_ATTR
 
 // CHECK-LABEL: @test_svrev_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.rev.nxv4f32(<vscale x 4 x float> [[OP:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[OP:%.*]])
 // CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z14test_svrev_f32u13__SVFloat32_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.rev.nxv4f32(<vscale x 4 x float> [[OP:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 svfloat32_t test_svrev_f32(svfloat32_t op) MODE_ATTR
@@ -174,12 +174,12 @@ svfloat32_t test_svrev_f32(svfloat32_t op) MODE_ATTR
 
 // CHECK-LABEL: @test_svrev_f64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.rev.nxv2f64(<vscale x 2 x double> [[OP:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> [[OP:%.*]])
 // CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z14test_svrev_f64u13__SVFloat64_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.rev.nxv2f64(<vscale x 2 x double> [[OP:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 svfloat64_t test_svrev_f64(svfloat64_t op) MODE_ATTR

>From 8386dde658d8572d943a09661e584af9f71b1158 Mon Sep 17 00:00:00 2001
From: Jorge Botto <jorge.botto.16 at ucl.ac.uk>
Date: Mon, 27 Jan 2025 20:21:17 +0000
Subject: [PATCH 2/4] Making Clang emit llvm.vector.reverse instead of
 llvm.aarch64.sve.rev for svrev_bf16 and svrev_b8

---
 clang/include/clang/Basic/arm_sve.td                          | 4 ++--
 .../test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev-bfloat.c | 4 ++--
 clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c      | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index c954a6582171728..5b7c64490fff4ec 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1082,7 +1082,7 @@ def SVZIP2       : SInst<"svzip2[_{d}]",      "ddd",  "csilUcUsUiUlhfd", MergeNo
 
 let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
 def SVEXT_BF16    : SInst<"svext[_{d}]",    "dddi", "b", MergeNone, "aarch64_sve_ext", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckExtract, 1>]>;
-def SVREV_BF16    : SInst<"svrev[_{d}]",    "dd",   "b", MergeNone, "aarch64_sve_rev", [VerifyRuntimeMode]>;
+def SVREV_BF16    : SInst<"svrev[_{d}]",    "dd",   "b", MergeNone, "vector_reverse", [VerifyRuntimeMode]>;
 def SVSEL_BF16    : SInst<"svsel[_{d}]",    "dPdd", "b", MergeNone, "aarch64_sve_sel", [VerifyRuntimeMode]>;
 def SVSPLICE_BF16 : SInst<"svsplice[_{d}]", "dPdd", "b", MergeNone, "aarch64_sve_splice", [VerifyRuntimeMode]>;
 def SVTRN1_BF16   : SInst<"svtrn1[_{d}]",   "ddd",  "b", MergeNone, "aarch64_sve_trn1", [VerifyRuntimeMode]>;
@@ -1093,7 +1093,7 @@ def SVZIP1_BF16   : SInst<"svzip1[_{d}]",   "ddd",  "b", MergeNone, "aarch64_sve
 def SVZIP2_BF16   : SInst<"svzip2[_{d}]",   "ddd",  "b", MergeNone, "aarch64_sve_zip2", [VerifyRuntimeMode]>;
 }
 
-def SVREV_B8   : SInst<"svrev_b8",     "PP",   "Pc", MergeNone, "aarch64_sve_rev", [VerifyRuntimeMode]>;
+def SVREV_B8   : SInst<"svrev_b8",     "PP",   "Pc", MergeNone, "vector_reverse", [VerifyRuntimeMode]>;
 def SVREV_B16  : SInst<"svrev_b16",    "PP",   "Pc", MergeNone, "aarch64_sve_rev_b16",  [IsOverloadNone, VerifyRuntimeMode]>;
 def SVREV_B32  : SInst<"svrev_b32",    "PP",   "Pc", MergeNone, "aarch64_sve_rev_b32",  [IsOverloadNone, VerifyRuntimeMode]>;
 def SVREV_B64  : SInst<"svrev_b64",    "PP",   "Pc", MergeNone, "aarch64_sve_rev_b64",  [IsOverloadNone, VerifyRuntimeMode]>;
diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev-bfloat.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev-bfloat.c
index 9b3e813fa969472..43c0da842001c88 100644
--- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev-bfloat.c
+++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev-bfloat.c
@@ -25,12 +25,12 @@
 
 // CHECK-LABEL: @test_svrev_bf16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.rev.nxv8bf16(<vscale x 8 x bfloat> [[OP:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> [[OP:%.*]])
 // CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z15test_svrev_bf16u14__SVBfloat16_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.rev.nxv8bf16(<vscale x 8 x bfloat> [[OP:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
 svbfloat16_t test_svrev_bf16(svbfloat16_t op) MODE_ATTR
diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c
index 835d1c616aebcb0..856f76e67afcf5a 100644
--- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c
+++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rev.c
@@ -189,12 +189,12 @@ svfloat64_t test_svrev_f64(svfloat64_t op) MODE_ATTR
 
 // CHECK-LABEL: @test_svrev_b8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rev.nxv16i1(<vscale x 16 x i1> [[OP:%.*]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> [[OP:%.*]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
 // CPP-CHECK-LABEL: @_Z13test_svrev_b8u10__SVBool_t(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rev.nxv16i1(<vscale x 16 x i1> [[OP:%.*]])
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
 svbool_t test_svrev_b8(svbool_t op) MODE_ATTR

>From a10ac09fafbafe5fc3798437a5ebc940bf81022e Mon Sep 17 00:00:00 2001
From: Jorge Botto <jorge.botto.16 at ucl.ac.uk>
Date: Wed, 5 Feb 2025 00:00:34 +0000
Subject: [PATCH 3/4] Precommiting missing optimisation tests

---
 llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll | 182 +++++++++++++++++++
 1 file changed, 182 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll

diff --git a/llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll b/llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll
new file mode 100644
index 000000000000000..8455f2e5118ef7b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll
@@ -0,0 +1,182 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming -verify-machineinstrs < %s | FileCheck %s
+
+define <vscale x 16 x i1> @aarch64_sve_rev_inv(<vscale x 16 x i1> %0) {
+; CHECK-LABEL: aarch64_sve_rev_inv:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    rev p0.b, p0.b
+; CHECK-NEXT:    ret
+entry:
+  %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev(<vscale x 16 x i1> %0)
+  %2 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev(<vscale x 16 x i1> %1)
+  ret <vscale x 16 x i1> %2
+}
+
+define <vscale x 16 x i1> @aarch64_sve_rev_b16_inv(<vscale x 16 x i1> %0) {
+; CHECK-LABEL: aarch64_sve_rev_b16_inv:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    rev p0.h, p0.h
+; CHECK-NEXT:    ret
+entry:
+  %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> %0)
+  %2 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> %1)
+  ret <vscale x 16 x i1> %2
+}
+
+define <vscale x 16 x i1> @aarch64_sve_rev_b32_inv(<vscale x 16 x i1> %0) {
+; CHECK-LABEL: aarch64_sve_rev_b32_inv:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    rev p0.s, p0.s
+; CHECK-NEXT:    ret
+entry:
+  %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1> %0)
+  %2 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1> %1)
+  ret <vscale x 16 x i1> %2
+}
+
+define <vscale x 16 x i1> @aarch64_sve_rev_b64_inv(<vscale x 16 x i1> %0) {
+; CHECK-LABEL: aarch64_sve_rev_b64_inv:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    rev p0.d, p0.d
+; CHECK-NEXT:    ret
+entry:
+  %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1> %0)
+  %2 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1> %1)
+  ret <vscale x 16 x i1> %2
+}
+
+define <vscale x 4 x i32> @aarch64_sve_revb_inv(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: aarch64_sve_revb_inv:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    revb z0.s, p0/m, z1.s
+; CHECK-NEXT:    revb z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+  %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 16 x i8> @aarch64_sve_revd_inv(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: aarch64_sve_revd_inv:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    revd z0.q, p0/m, z1.q
+; CHECK-NEXT:    revd z0.q, p0/m, z1.q
+; CHECK-NEXT:    ret
+  %1 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b)
+  %2 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %1, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %2
+}
+
+define <vscale x 4 x i32> @aarch64_sve_revh_inv(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: aarch64_sve_revh_inv:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    revh z0.s, p0/m, z1.s
+; CHECK-NEXT:    revh z0.s, p0/m, z1.s
+; CHECK-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+  %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 2 x i64> @aarch64_sve_revw_inv(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: aarch64_sve_revw_inv:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    revw z0.d, p0/m, z1.d
+; CHECK-NEXT:    revw z0.d, p0/m, z1.d
+; CHECK-NEXT:    ret
+  %1 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+  %2 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %1, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %2
+}
+
+define <vscale x 4 x i32> @test_aarch64_sve_revb_pg_mismatch(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i1> %pg1, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: test_aarch64_sve_revb_pg_mismatch:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    revb z0.s, p0/m, z1.s
+; CHECK-NEXT:    revb z0.s, p1/m, z1.s
+; CHECK-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+  %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> %pg1, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @test_aarch64_sve_revb_b_mismatch(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b, <vscale x 4 x i32> %b1) {
+; CHECK-LABEL: test_aarch64_sve_revb_b_mismatch:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    revb z0.s, p0/m, z1.s
+; CHECK-NEXT:    revb z0.s, p0/m, z2.s
+; CHECK-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+  %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b1)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 16 x i8> @test_aarch64_sve_revd_pg_mismatch(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i1> %pg1, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: test_aarch64_sve_revd_pg_mismatch:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    revd z0.q, p0/m, z1.q
+; CHECK-NEXT:    revd z0.q, p1/m, z1.q
+; CHECK-NEXT:    ret
+  %1 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b)
+  %2 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %1, <vscale x 16 x i1> %pg1, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %2
+}
+
+define <vscale x 16 x i8> @test_aarch64_sve_revd_b_mismatch(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b, <vscale x 16 x i8> %b1) {
+; CHECK-LABEL: test_aarch64_sve_revd_b_mismatch:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    revd z0.q, p0/m, z1.q
+; CHECK-NEXT:    revd z0.q, p0/m, z2.q
+; CHECK-NEXT:    ret
+  %1 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b)
+  %2 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %1, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b1)
+  ret <vscale x 16 x i8> %2
+}
+
+define <vscale x 4 x i32> @test_aarch64_sve_revh_pg_mismatch(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i1> %pg1, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: test_aarch64_sve_revh_pg_mismatch:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    revh z0.s, p0/m, z1.s
+; CHECK-NEXT:    revh z0.s, p1/m, z1.s
+; CHECK-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+  %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> %pg1, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @test_aarch64_sve_revh_b_mismatch(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b, <vscale x 4 x i32> %b1) {
+; CHECK-LABEL: test_aarch64_sve_revh_b_mismatch:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    revh z0.s, p0/m, z1.s
+; CHECK-NEXT:    revh z0.s, p0/m, z2.s
+; CHECK-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
+  %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b1)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 2 x i64> @test_aarch64_sve_revw_pg_mismatch(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i1> %pg1, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: test_aarch64_sve_revw_pg_mismatch:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    revw z0.d, p0/m, z1.d
+; CHECK-NEXT:    revw z0.d, p1/m, z1.d
+; CHECK-NEXT:    ret
+  %1 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+  %2 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %1, <vscale x 2 x i1> %pg1, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %2
+}
+
+define <vscale x 2 x i64> @test_aarch64_sve_revw_b_mismatch(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b, <vscale x 2 x i64> %b1) {
+; CHECK-LABEL: test_aarch64_sve_revw_b_mismatch:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    revw z0.d, p0/m, z1.d
+; CHECK-NEXT:    revw z0.d, p0/m, z2.d
+; CHECK-NEXT:    ret
+  %1 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
+  %2 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %1, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b1)
+  ret <vscale x 2 x i64> %2
+}

>From 5a1829ff82467df54ef4f3dcbb65190a1aaa8fdd Mon Sep 17 00:00:00 2001
From: Jorge Botto <jorge.botto.16 at ucl.ac.uk>
Date: Thu, 6 Feb 2025 01:04:42 +0000
Subject: [PATCH 4/4] Adding missed optimisation

---
 .../Target/AArch64/AArch64ISelLowering.cpp    | 38 +++++++++++++++++++
 llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll  | 24 ++++--------
 2 files changed, 46 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 34464d317beafe4..323fbde74bf1974 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -21962,6 +21962,35 @@ SDValue tryLowerPartialReductionToWideAdd(SDNode *N,
   return DAG.getNode(TopOpcode, DL, AccVT, BottomNode, ExtOp);
 }
 
+static SDValue foldRevInvolution(SDNode *N) {
+  SDValue InnerRev = N->getOperand(1);
+  if (!InnerRev.hasOneUse())
+    return SDValue();
+
+  unsigned OuterIId = getIntrinsicID(N);
+  unsigned InnerIId = getIntrinsicID(InnerRev.getNode());
+  if (OuterIId != InnerIId)
+    return SDValue();
+
+  switch (OuterIId) {
+  case Intrinsic::aarch64_sve_revb:
+  case Intrinsic::aarch64_sve_revd:
+  case Intrinsic::aarch64_sve_revh:
+  case Intrinsic::aarch64_sve_revw:
+    if (N->getOperand(2) != InnerRev.getOperand(2) ||
+        N->getOperand(3) != InnerRev.getOperand(3))
+      return SDValue();
+    [[fallthrough]];
+  case Intrinsic::aarch64_sve_rev:
+  case Intrinsic::aarch64_sve_rev_b16:
+  case Intrinsic::aarch64_sve_rev_b32:
+  case Intrinsic::aarch64_sve_rev_b64:
+    return InnerRev.getOperand(1);
+  default:
+    return SDValue();
+  }
+}
+
 static SDValue performIntrinsicCombine(SDNode *N,
                                        TargetLowering::DAGCombinerInfo &DCI,
                                        const AArch64Subtarget *Subtarget) {
@@ -22270,6 +22299,15 @@ static SDValue performIntrinsicCombine(SDNode *N,
     return tryConvertSVEWideCompare(N, ISD::SETULT, DCI, DAG);
   case Intrinsic::aarch64_sve_cmpls_wide:
     return tryConvertSVEWideCompare(N, ISD::SETULE, DCI, DAG);
+  case Intrinsic::aarch64_sve_rev:
+  case Intrinsic::aarch64_sve_rev_b16:
+  case Intrinsic::aarch64_sve_rev_b32:
+  case Intrinsic::aarch64_sve_rev_b64:
+  case Intrinsic::aarch64_sve_revb:
+  case Intrinsic::aarch64_sve_revd:
+  case Intrinsic::aarch64_sve_revh:
+  case Intrinsic::aarch64_sve_revw:
+    return foldRevInvolution(N);
   case Intrinsic::aarch64_sve_ptest_any:
     return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
                     AArch64CC::ANY_ACTIVE);
diff --git a/llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll b/llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll
index 8455f2e5118ef7b..984845363501b24 100644
--- a/llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64_sve_rev.ll
@@ -4,8 +4,6 @@
 define <vscale x 16 x i1> @aarch64_sve_rev_inv(<vscale x 16 x i1> %0) {
 ; CHECK-LABEL: aarch64_sve_rev_inv:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rev p0.b, p0.b
-; CHECK-NEXT:    rev p0.b, p0.b
 ; CHECK-NEXT:    ret
 entry:
   %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev(<vscale x 16 x i1> %0)
@@ -16,8 +14,6 @@ entry:
 define <vscale x 16 x i1> @aarch64_sve_rev_b16_inv(<vscale x 16 x i1> %0) {
 ; CHECK-LABEL: aarch64_sve_rev_b16_inv:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rev p0.h, p0.h
-; CHECK-NEXT:    rev p0.h, p0.h
 ; CHECK-NEXT:    ret
 entry:
   %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> %0)
@@ -28,8 +24,6 @@ entry:
 define <vscale x 16 x i1> @aarch64_sve_rev_b32_inv(<vscale x 16 x i1> %0) {
 ; CHECK-LABEL: aarch64_sve_rev_b32_inv:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rev p0.s, p0.s
-; CHECK-NEXT:    rev p0.s, p0.s
 ; CHECK-NEXT:    ret
 entry:
   %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1> %0)
@@ -40,8 +34,6 @@ entry:
 define <vscale x 16 x i1> @aarch64_sve_rev_b64_inv(<vscale x 16 x i1> %0) {
 ; CHECK-LABEL: aarch64_sve_rev_b64_inv:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    rev p0.d, p0.d
-; CHECK-NEXT:    rev p0.d, p0.d
 ; CHECK-NEXT:    ret
 entry:
   %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1> %0)
@@ -52,8 +44,6 @@ entry:
 define <vscale x 4 x i32> @aarch64_sve_revb_inv(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: aarch64_sve_revb_inv:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    revb z0.s, p0/m, z1.s
-; CHECK-NEXT:    revb z0.s, p0/m, z1.s
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
   %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
@@ -63,8 +53,6 @@ define <vscale x 4 x i32> @aarch64_sve_revb_inv(<vscale x 4 x i32> %a, <vscale x
 define <vscale x 16 x i8> @aarch64_sve_revd_inv(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: aarch64_sve_revd_inv:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    revd z0.q, p0/m, z1.q
-; CHECK-NEXT:    revd z0.q, p0/m, z1.q
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b)
   %2 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %1, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b)
@@ -74,8 +62,6 @@ define <vscale x 16 x i8> @aarch64_sve_revd_inv(<vscale x 16 x i8> %a, <vscale x
 define <vscale x 4 x i32> @aarch64_sve_revh_inv(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: aarch64_sve_revh_inv:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    revh z0.s, p0/m, z1.s
-; CHECK-NEXT:    revh z0.s, p0/m, z1.s
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
   %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
@@ -85,14 +71,13 @@ define <vscale x 4 x i32> @aarch64_sve_revh_inv(<vscale x 4 x i32> %a, <vscale x
 define <vscale x 2 x i64> @aarch64_sve_revw_inv(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: aarch64_sve_revw_inv:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    revw z0.d, p0/m, z1.d
-; CHECK-NEXT:    revw z0.d, p0/m, z1.d
 ; CHECK-NEXT:    ret
   %1 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
   %2 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %1, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %2
 }
 
+; negative test
 define <vscale x 4 x i32> @test_aarch64_sve_revb_pg_mismatch(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i1> %pg1, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: test_aarch64_sve_revb_pg_mismatch:
 ; CHECK:       // %bb.0:
@@ -104,6 +89,7 @@ define <vscale x 4 x i32> @test_aarch64_sve_revb_pg_mismatch(<vscale x 4 x i32>
   ret <vscale x 4 x i32> %2
 }
 
+; negative test
 define <vscale x 4 x i32> @test_aarch64_sve_revb_b_mismatch(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b, <vscale x 4 x i32> %b1) {
 ; CHECK-LABEL: test_aarch64_sve_revb_b_mismatch:
 ; CHECK:       // %bb.0:
@@ -115,6 +101,7 @@ define <vscale x 4 x i32> @test_aarch64_sve_revb_b_mismatch(<vscale x 4 x i32> %
   ret <vscale x 4 x i32> %2
 }
 
+; negative test
 define <vscale x 16 x i8> @test_aarch64_sve_revd_pg_mismatch(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i1> %pg1, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: test_aarch64_sve_revd_pg_mismatch:
 ; CHECK:       // %bb.0:
@@ -126,6 +113,7 @@ define <vscale x 16 x i8> @test_aarch64_sve_revd_pg_mismatch(<vscale x 16 x i8>
   ret <vscale x 16 x i8> %2
 }
 
+; negative test
 define <vscale x 16 x i8> @test_aarch64_sve_revd_b_mismatch(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b, <vscale x 16 x i8> %b1) {
 ; CHECK-LABEL: test_aarch64_sve_revd_b_mismatch:
 ; CHECK:       // %bb.0:
@@ -137,6 +125,7 @@ define <vscale x 16 x i8> @test_aarch64_sve_revd_b_mismatch(<vscale x 16 x i8> %
   ret <vscale x 16 x i8> %2
 }
 
+; negative test
 define <vscale x 4 x i32> @test_aarch64_sve_revh_pg_mismatch(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i1> %pg1, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: test_aarch64_sve_revh_pg_mismatch:
 ; CHECK:       // %bb.0:
@@ -148,6 +137,7 @@ define <vscale x 4 x i32> @test_aarch64_sve_revh_pg_mismatch(<vscale x 4 x i32>
   ret <vscale x 4 x i32> %2
 }
 
+; negative test
 define <vscale x 4 x i32> @test_aarch64_sve_revh_b_mismatch(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b, <vscale x 4 x i32> %b1) {
 ; CHECK-LABEL: test_aarch64_sve_revh_b_mismatch:
 ; CHECK:       // %bb.0:
@@ -159,6 +149,7 @@ define <vscale x 4 x i32> @test_aarch64_sve_revh_b_mismatch(<vscale x 4 x i32> %
   ret <vscale x 4 x i32> %2
 }
 
+; negative test
 define <vscale x 2 x i64> @test_aarch64_sve_revw_pg_mismatch(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i1> %pg1, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: test_aarch64_sve_revw_pg_mismatch:
 ; CHECK:       // %bb.0:
@@ -170,6 +161,7 @@ define <vscale x 2 x i64> @test_aarch64_sve_revw_pg_mismatch(<vscale x 2 x i64>
   ret <vscale x 2 x i64> %2
 }
 
+; negative test
 define <vscale x 2 x i64> @test_aarch64_sve_revw_b_mismatch(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b, <vscale x 2 x i64> %b1) {
 ; CHECK-LABEL: test_aarch64_sve_revw_b_mismatch:
 ; CHECK:       // %bb.0: