[llvm] [Arch64][SVE] Lower svrev_* to llvm.vector.reverse and fold svrev(svrev(x)) -> x (PR #116422)

Fri Apr 25 10:11:28 PDT 2025

https://github.com/jf-botto updated https://github.com/llvm/llvm-project/pull/116422

>From 9d7071811dc975c0aed231f2367bfea73c90cb14 Mon Sep 17 00:00:00 2001
From: Jorge Botto <jorge.botto.16 at ucl.ac.uk>
Date: Fri, 25 Apr 2025 00:22:59 +0100
Subject: [PATCH 1/2] Pre-committing tests

---
 .../AArch64/sve-intrinsic-rev-inv.ll          | 277 ++++++++++++++++++
 1 file changed, 277 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-rev-inv.ll

diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-rev-inv.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-rev-inv.ll
new file mode 100644
index 0000000000000..204208dde5f45
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-rev-inv.ll
@@ -0,0 +1,277 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define <vscale x 16 x i1> @aarch64_sve_rev_inv(<vscale x 16 x i1> %0) #0{
+; CHECK-LABEL: define <vscale x 16 x i1> @aarch64_sve_rev_inv(
+; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.nxv16i1(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.nxv16i1(<vscale x 16 x i1> [[TMP1]])
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
+;
+entry:
+  %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev(<vscale x 16 x i1> %0)
+  %2 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev(<vscale x 16 x i1> %1)
+  ret <vscale x 16 x i1> %2
+}
+
+define <vscale x 16 x i1> @aarch64_sve_rev_b16_inv(<vscale x 16 x i1> %0) #0{
+; CHECK-LABEL: define <vscale x 16 x i1> @aarch64_sve_rev_b16_inv(
+; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> [[TMP1]])
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
+;
+entry:
+  %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> %0)
+  %2 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> %1)
+  ret <vscale x 16 x i1> %2
+}
+
+define <vscale x 16 x i1> @aarch64_sve_rev_b32_inv(<vscale x 16 x i1> %0) #0{
+; CHECK-LABEL: define <vscale x 16 x i1> @aarch64_sve_rev_b32_inv(
+; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1> [[TMP1]])
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
+;
+entry:
+  %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1> %0)
+  %2 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1> %1)
+  ret <vscale x 16 x i1> %2
+}
+
+define <vscale x 16 x i1> @aarch64_sve_rev_b64_inv(<vscale x 16 x i1> %0)#0 {
+; CHECK-LABEL: define <vscale x 16 x i1> @aarch64_sve_rev_b64_inv(
+; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1> [[TMP1]])
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
+;
+entry:
+  %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1> %0)
+  %2 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1> %1)
+  ret <vscale x 16 x i1> %2
+}
+
+
+define <vscale x 4 x i32> @aarch64_sve_revb_inv(<vscale x 4 x i32> %pred, <vscale x 4 x i1> %passthru, <vscale x 4 x i32> %a) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @aarch64_sve_revb_inv(
+; CHECK-SAME: <vscale x 4 x i32> [[PRED:%.*]], <vscale x 4 x i1> [[PASSTHRU:%.*]], <vscale x 4 x i32> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> [[PRED]], <vscale x 4 x i1> [[PASSTHRU]], <vscale x 4 x i32> [[A]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> [[PRED]], <vscale x 4 x i1> [[PASSTHRU]], <vscale x 4 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
+;
+  %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %pred, <vscale x 4 x i1> %passthru, <vscale x 4 x i32> %a)
+  %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %pred, <vscale x 4 x i1> %passthru, <vscale x 4 x i32> %1)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 16 x i8> @aarch64_sve_revd_inv(<vscale x 16 x i8> %pred, <vscale x 16 x i1> %passthru, <vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @aarch64_sve_revd_inv(
+; CHECK-SAME: <vscale x 16 x i8> [[PRED:%.*]], <vscale x 16 x i1> [[PASSTHRU:%.*]], <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> [[PRED]], <vscale x 16 x i1> [[PASSTHRU]], <vscale x 16 x i8> [[A]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> [[PRED]], <vscale x 16 x i1> [[PASSTHRU]], <vscale x 16 x i8> [[TMP1]])
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP2]]
+;
+  %1 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %pred, <vscale x 16 x i1> %passthru, <vscale x 16 x i8> %a)
+  %2 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %pred, <vscale x 16 x i1> %passthru, <vscale x 16 x i8> %1)
+  ret <vscale x 16 x i8> %2
+}
+
+define <vscale x 4 x i32> @aarch64_sve_revh_inv(<vscale x 4 x i32> %pred, <vscale x 4 x i1> %passthru, <vscale x 4 x i32> %a) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @aarch64_sve_revh_inv(
+; CHECK-SAME: <vscale x 4 x i32> [[PRED:%.*]], <vscale x 4 x i1> [[PASSTHRU:%.*]], <vscale x 4 x i32> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> [[PRED]], <vscale x 4 x i1> [[PASSTHRU]], <vscale x 4 x i32> [[A]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> [[PRED]], <vscale x 4 x i1> [[PASSTHRU]], <vscale x 4 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
+;
+  %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %pred, <vscale x 4 x i1> %passthru, <vscale x 4 x i32> %a)
+  %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %pred, <vscale x 4 x i1> %passthru, <vscale x 4 x i32> %1)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 2 x i64> @aarch64_sve_revw_inv(<vscale x 2 x i64> %pred, <vscale x 2 x i1> %passthru, <vscale x 2 x i64> %a) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @aarch64_sve_revw_inv(
+; CHECK-SAME: <vscale x 2 x i64> [[PRED:%.*]], <vscale x 2 x i1> [[PASSTHRU:%.*]], <vscale x 2 x i64> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> [[PRED]], <vscale x 2 x i1> [[PASSTHRU]], <vscale x 2 x i64> [[A]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> [[PRED]], <vscale x 2 x i1> [[PASSTHRU]], <vscale x 2 x i64> [[TMP1]])
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
+;
+  %1 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %pred, <vscale x 2 x i1> %passthru, <vscale x 2 x i64> %a)
+  %2 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %pred, <vscale x 2 x i1> %passthru, <vscale x 2 x i64> %1)
+  ret <vscale x 2 x i64> %2
+}
+
+
+; negative test
+define <vscale x 4 x i32> @aarch64_sve_revb_inv_pred_mismatch(<vscale x 4 x i32> %pred, <vscale x 4 x i32> %pred1, <vscale x 4 x i1> %passthru, <vscale x 4 x i32> %a) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @aarch64_sve_revb_inv_pred_mismatch(
+; CHECK-SAME: <vscale x 4 x i32> [[PRED:%.*]], <vscale x 4 x i32> [[PRED1:%.*]], <vscale x 4 x i1> [[PASSTHRU:%.*]], <vscale x 4 x i32> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> [[PRED]], <vscale x 4 x i1> [[PASSTHRU]], <vscale x 4 x i32> [[A]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> [[PRED1]], <vscale x 4 x i1> [[PASSTHRU]], <vscale x 4 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
+;
+  %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %pred, <vscale x 4 x i1> %passthru, <vscale x 4 x i32> %a)
+  %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %pred1, <vscale x 4 x i1> %passthru, <vscale x 4 x i32> %1)
+  ret <vscale x 4 x i32> %2
+}
+
+; negative test
+define <vscale x 16 x i8> @aarch64_sve_revd_inv_pred_mismatch(<vscale x 16 x i8> %pred, <vscale x 16 x i8> %pred1, <vscale x 16 x i1> %passthru, <vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @aarch64_sve_revd_inv_pred_mismatch(
+; CHECK-SAME: <vscale x 16 x i8> [[PRED:%.*]], <vscale x 16 x i8> [[PRED1:%.*]], <vscale x 16 x i1> [[PASSTHRU:%.*]], <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> [[PRED]], <vscale x 16 x i1> [[PASSTHRU]], <vscale x 16 x i8> [[A]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> [[PRED1]], <vscale x 16 x i1> [[PASSTHRU]], <vscale x 16 x i8> [[TMP1]])
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP2]]
+;
+  %1 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %pred, <vscale x 16 x i1> %passthru, <vscale x 16 x i8> %a)
+  %2 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %pred1, <vscale x 16 x i1> %passthru, <vscale x 16 x i8> %1)
+  ret <vscale x 16 x i8> %2
+}
+
+; negative test
+define <vscale x 4 x i32> @aarch64_sve_revh_inv_pred_mismatch(<vscale x 4 x i32> %pred, <vscale x 4 x i32> %pred1, <vscale x 4 x i1> %passthru, <vscale x 4 x i32> %a) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @aarch64_sve_revh_inv_pred_mismatch(
+; CHECK-SAME: <vscale x 4 x i32> [[PRED:%.*]], <vscale x 4 x i32> [[PRED1:%.*]], <vscale x 4 x i1> [[PASSTHRU:%.*]], <vscale x 4 x i32> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> [[PRED]], <vscale x 4 x i1> [[PASSTHRU]], <vscale x 4 x i32> [[A]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> [[PRED1]], <vscale x 4 x i1> [[PASSTHRU]], <vscale x 4 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
+;
+  %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %pred, <vscale x 4 x i1> %passthru, <vscale x 4 x i32> %a)
+  %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %pred1, <vscale x 4 x i1> %passthru, <vscale x 4 x i32> %1)
+  ret <vscale x 4 x i32> %2
+}
+
+; negative test
+define <vscale x 2 x i64> @aarch64_sve_revw_inv_pred_mismatch(<vscale x 2 x i64> %pred, <vscale x 2 x i64> %pred1, <vscale x 2 x i1> %passthru, <vscale x 2 x i64> %a) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @aarch64_sve_revw_inv_pred_mismatch(
+; CHECK-SAME: <vscale x 2 x i64> [[PRED:%.*]], <vscale x 2 x i64> [[PRED1:%.*]], <vscale x 2 x i1> [[PASSTHRU:%.*]], <vscale x 2 x i64> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> [[PRED]], <vscale x 2 x i1> [[PASSTHRU]], <vscale x 2 x i64> [[A]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> [[PRED1]], <vscale x 2 x i1> [[PASSTHRU]], <vscale x 2 x i64> [[TMP1]])
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
+;
+  %1 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %pred, <vscale x 2 x i1> %passthru, <vscale x 2 x i64> %a)
+  %2 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %pred1, <vscale x 2 x i1> %passthru, <vscale x 2 x i64> %1)
+  ret <vscale x 2 x i64> %2
+}
+
+; negative test
+define <vscale x 4 x i32> @aarch64_sve_revb_inv_passthru_mismatch(<vscale x 4 x i32> %pred, <vscale x 4 x i1> %passthru, <vscale x 4 x i1> %passthru1, <vscale x 4 x i32> %a) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @aarch64_sve_revb_inv_passthru_mismatch(
+; CHECK-SAME: <vscale x 4 x i32> [[PRED:%.*]], <vscale x 4 x i1> [[PASSTHRU:%.*]], <vscale x 4 x i1> [[PASSTHRU1:%.*]], <vscale x 4 x i32> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> [[PRED]], <vscale x 4 x i1> [[PASSTHRU]], <vscale x 4 x i32> [[A]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> [[PRED]], <vscale x 4 x i1> [[PASSTHRU1]], <vscale x 4 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
+;
+  %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %pred, <vscale x 4 x i1> %passthru, <vscale x 4 x i32> %a)
+  %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %pred, <vscale x 4 x i1> %passthru1, <vscale x 4 x i32> %1)
+  ret <vscale x 4 x i32> %2
+}
+
+; negative test
+define <vscale x 16 x i8> @aarch64_sve_revd_inv_passthru_mismatch(<vscale x 16 x i8> %pred, <vscale x 16 x i1> %passthru, <vscale x 16 x i1> %passthru1, <vscale x 16 x i8> %a) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @aarch64_sve_revd_inv_passthru_mismatch(
+; CHECK-SAME: <vscale x 16 x i8> [[PRED:%.*]], <vscale x 16 x i1> [[PASSTHRU:%.*]], <vscale x 16 x i1> [[PASSTHRU1:%.*]], <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> [[PRED]], <vscale x 16 x i1> [[PASSTHRU]], <vscale x 16 x i8> [[A]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> [[PRED]], <vscale x 16 x i1> [[PASSTHRU1]], <vscale x 16 x i8> [[TMP1]])
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP2]]
+;
+  %1 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %pred, <vscale x 16 x i1> %passthru, <vscale x 16 x i8> %a)
+  %2 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %pred, <vscale x 16 x i1> %passthru1, <vscale x 16 x i8> %1)
+  ret <vscale x 16 x i8> %2
+}
+
+; negative test
+define <vscale x 4 x i32> @aarch64_sve_revh_inv_passthru_mismatch(<vscale x 4 x i32> %pred, <vscale x 4 x i1> %passthru, <vscale x 4 x i1> %passthru1, <vscale x 4 x i32> %a) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @aarch64_sve_revh_inv_passthru_mismatch(
+; CHECK-SAME: <vscale x 4 x i32> [[PRED:%.*]], <vscale x 4 x i1> [[PASSTHRU:%.*]], <vscale x 4 x i1> [[PASSTHRU1:%.*]], <vscale x 4 x i32> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> [[PRED]], <vscale x 4 x i1> [[PASSTHRU]], <vscale x 4 x i32> [[A]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> [[PRED]], <vscale x 4 x i1> [[PASSTHRU1]], <vscale x 4 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
+;
+  %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %pred, <vscale x 4 x i1> %passthru, <vscale x 4 x i32> %a)
+  %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %pred, <vscale x 4 x i1> %passthru1, <vscale x 4 x i32> %1)
+  ret <vscale x 4 x i32> %2
+}
+
+; negative test
+define <vscale x 2 x i64> @aarch64_sve_revw_inv_passthru_mismatch(<vscale x 2 x i64> %pred, <vscale x 2 x i1> %passthru, <vscale x 2 x i1> %passthru1, <vscale x 2 x i64> %a) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @aarch64_sve_revw_inv_passthru_mismatch(
+; CHECK-SAME: <vscale x 2 x i64> [[PRED:%.*]], <vscale x 2 x i1> [[PASSTHRU:%.*]], <vscale x 2 x i1> [[PASSTHRU1:%.*]], <vscale x 2 x i64> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> [[PRED]], <vscale x 2 x i1> [[PASSTHRU]], <vscale x 2 x i64> [[A]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> [[PRED]], <vscale x 2 x i1> [[PASSTHRU1]], <vscale x 2 x i64> [[TMP1]])
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
+;
+  %1 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %pred, <vscale x 2 x i1> %passthru, <vscale x 2 x i64> %a)
+  %2 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %pred, <vscale x 2 x i1> %passthru1, <vscale x 2 x i64> %1)
+  ret <vscale x 2 x i64> %2
+}
+
+; negative test
+define <vscale x 16 x i1> @aarch64_sve_rev_mismatch(<vscale x 16 x i1> %0) #0{
+; CHECK-LABEL: define <vscale x 16 x i1> @aarch64_sve_rev_mismatch(
+; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1> [[TMP1]])
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
+;
+entry:
+  %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> %0)
+  %2 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1> %1)
+  ret <vscale x 16 x i1> %2
+}
+
+; negative test
+define <vscale x 2 x i64> @aarch64_sve_rev_mismatch_1(<vscale x 2 x i64> %pred, <vscale x 2 x i1> %passthru, <vscale x 2 x i64> %a) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @aarch64_sve_rev_mismatch_1(
+; CHECK-SAME: <vscale x 2 x i64> [[PRED:%.*]], <vscale x 2 x i1> [[PASSTHRU:%.*]], <vscale x 2 x i64> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> [[PRED]], <vscale x 2 x i1> [[PASSTHRU]], <vscale x 2 x i64> [[A]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.revh.nxv2i64(<vscale x 2 x i64> [[PRED]], <vscale x 2 x i1> [[PASSTHRU]], <vscale x 2 x i64> [[TMP1]])
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
+;
+  %1 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %pred, <vscale x 2 x i1> %passthru, <vscale x 2 x i64> %a)
+  %2 = call <vscale x 2 x i64> @llvm.aarch64.sve.revh.nxv2i64(<vscale x 2 x i64> %pred, <vscale x 2 x i1> %passthru, <vscale x 2 x i64> %1)
+  ret <vscale x 2 x i64> %2
+}
+
+; negative test
+define <vscale x 16 x i1> @aarch64_sve_rev_inv_multi_use(<vscale x 16 x i1> %0) #0 {
+; CHECK-LABEL: define <vscale x 16 x i1> @aarch64_sve_rev_inv_multi_use(
+; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = or <vscale x 16 x i1> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP3]]
+;
+entry:
+  %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> %0)
+  %2 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> %1)
+  %3 = or <vscale x 16 x i1> %1, %2
+  ret <vscale x 16 x i1> %3
+}
+
+
+; negative test
+define <vscale x 2 x i64> @aarch64_sve_revw_inv_multi_use(<vscale x 2 x i64> %pred, <vscale x 2 x i1> %passthru, <vscale x 2 x i64> %a) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @aarch64_sve_revw_inv_multi_use(
+; CHECK-SAME: <vscale x 2 x i64> [[PRED:%.*]], <vscale x 2 x i1> [[PASSTHRU:%.*]], <vscale x 2 x i64> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> [[PRED]], <vscale x 2 x i1> [[PASSTHRU]], <vscale x 2 x i64> [[A]])
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> [[PRED]], <vscale x 2 x i1> [[PASSTHRU]], <vscale x 2 x i64> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = or <vscale x 2 x i64> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
+;
+  %1 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %pred, <vscale x 2 x i1> %passthru, <vscale x 2 x i64> %a)
+  %2 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %pred, <vscale x 2 x i1> %passthru, <vscale x 2 x i64> %1)
+  %3 = or <vscale x 2 x i64> %1, %2
+  ret <vscale x 2 x i64> %3
+}
+
+attributes #0 = { "target-features"="+sve" }

>From 3569d1ceeed9c38b0d978b6a30eefd602ef6e455 Mon Sep 17 00:00:00 2001
From: Jorge Botto <jorge.botto.16 at ucl.ac.uk>
Date: Fri, 25 Apr 2025 18:06:25 +0100
Subject: [PATCH 2/2] Adding rev(rev(x)) -> x optimisation

---
 .../AArch64/AArch64TargetTransformInfo.cpp    | 56 +++++++++++++++++++
 .../AArch64/sve-intrinsic-rev-inv.ll          | 32 +++--------
 2 files changed, 64 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index fcc5eb1c05ba0..f9dca609f6828 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2368,6 +2368,7 @@ static std::optional<Instruction *> instCombineSVEUzp1(InstCombiner &IC,
 
   // uzp1(to_svbool(A), to_svbool(B)) --> <A, B>
   // uzp1(from_svbool(to_svbool(A)), from_svbool(to_svbool(B))) --> <A, B>
+
   if ((match(II.getArgOperand(0),
              m_Intrinsic<FromSVB>(m_Intrinsic<ToSVB>(m_Value(A)))) &&
        match(II.getArgOperand(1),
@@ -2674,6 +2675,52 @@ static std::optional<Instruction *> instCombinePTrue(InstCombiner &IC,
   return std::nullopt;
 }
 
+static std::optional<Instruction *> instCombineSVERev(InstCombiner &IC,
+                                                      IntrinsicInst &II) {
+  // rev(rev(x)) -> x
+  switch (II.getIntrinsicID()) {
+  default:
+    return std::nullopt;
+
+  case Intrinsic::aarch64_sve_rev:
+  case Intrinsic::aarch64_sve_rev_b16:
+  case Intrinsic::aarch64_sve_rev_b32:
+  case Intrinsic::aarch64_sve_rev_b64: {
+    Value *InnerArg = II.getArgOperand(0);
+    IntrinsicInst *InnerRev = dyn_cast<IntrinsicInst>(InnerArg);
+    // Fold rev(rev(x)) -> x, if intrinsic IDs match and InnerRev has one use
+    if (InnerRev && InnerRev->getIntrinsicID() == II.getIntrinsicID() &&
+        InnerRev->hasOneUse())
+      return IC.replaceInstUsesWith(II, InnerRev->getArgOperand(0));
+
+    return std::nullopt;
+  }
+
+  case Intrinsic::aarch64_sve_revb:
+  case Intrinsic::aarch64_sve_revd:
+  case Intrinsic::aarch64_sve_revh:
+  case Intrinsic::aarch64_sve_revw: {
+    Value *InnerArg = II.getArgOperand(2);
+    IntrinsicInst *InnerRev = dyn_cast<IntrinsicInst>(InnerArg);
+
+    // Early exit if InnerRev != outerId and doesn't only have one use
+    if (!InnerRev || InnerRev->getIntrinsicID() != II.getIntrinsicID() ||
+        !InnerRev->hasOneUse())
+      return std::nullopt;
+
+    Value *OuterPred = II.getArgOperand(0);
+    Value *OuterPassThru = II.getArgOperand(1);
+    Value *InnerPred = InnerRev->getArgOperand(0);
+    Value *InnerPassThru = InnerRev->getArgOperand(1);
+
+    // Fold rev(rev(x)) -> x, if predicates and pass-thrus match
+    return IC.replaceInstUsesWith(II, InnerRev->getArgOperand(2));
+
+    return std::nullopt;
+  }
+  }
+}
+
 std::optional<Instruction *>
 AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
                                      IntrinsicInst &II) const {
@@ -2773,6 +2820,15 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
     return instCombineSVEInsr(IC, II);
   case Intrinsic::aarch64_sve_ptrue:
     return instCombinePTrue(IC, II);
+  case Intrinsic::aarch64_sve_rev:
+  case Intrinsic::aarch64_sve_rev_b16:
+  case Intrinsic::aarch64_sve_rev_b32:
+  case Intrinsic::aarch64_sve_rev_b64:
+  case Intrinsic::aarch64_sve_revb:
+  case Intrinsic::aarch64_sve_revd:
+  case Intrinsic::aarch64_sve_revh:
+  case Intrinsic::aarch64_sve_revw:
+    return instCombineSVERev(IC, II);
   }
 
   return std::nullopt;
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-rev-inv.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-rev-inv.ll
index 204208dde5f45..e9541eda85d40 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-rev-inv.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-rev-inv.ll
@@ -7,9 +7,7 @@ define <vscale x 16 x i1> @aarch64_sve_rev_inv(<vscale x 16 x i1> %0) #0{
 ; CHECK-LABEL: define <vscale x 16 x i1> @aarch64_sve_rev_inv(
 ; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.nxv16i1(<vscale x 16 x i1> [[TMP0]])
-; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.nxv16i1(<vscale x 16 x i1> [[TMP1]])
-; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 ;
 entry:
   %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev(<vscale x 16 x i1> %0)
@@ -21,9 +19,7 @@ define <vscale x 16 x i1> @aarch64_sve_rev_b16_inv(<vscale x 16 x i1> %0) #0{
 ; CHECK-LABEL: define <vscale x 16 x i1> @aarch64_sve_rev_b16_inv(
 ; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> [[TMP0]])
-; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> [[TMP1]])
-; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 ;
 entry:
   %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> %0)
@@ -35,9 +31,7 @@ define <vscale x 16 x i1> @aarch64_sve_rev_b32_inv(<vscale x 16 x i1> %0) #0{
 ; CHECK-LABEL: define <vscale x 16 x i1> @aarch64_sve_rev_b32_inv(
 ; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1> [[TMP0]])
-; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1> [[TMP1]])
-; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 ;
 entry:
   %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1> %0)
@@ -49,9 +43,7 @@ define <vscale x 16 x i1> @aarch64_sve_rev_b64_inv(<vscale x 16 x i1> %0)#0 {
 ; CHECK-LABEL: define <vscale x 16 x i1> @aarch64_sve_rev_b64_inv(
 ; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1> [[TMP0]])
-; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1> [[TMP1]])
-; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
+; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 ;
 entry:
   %1 = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1> %0)
@@ -63,9 +55,7 @@ entry:
 define <vscale x 4 x i32> @aarch64_sve_revb_inv(<vscale x 4 x i32> %pred, <vscale x 4 x i1> %passthru, <vscale x 4 x i32> %a) #0 {
 ; CHECK-LABEL: define <vscale x 4 x i32> @aarch64_sve_revb_inv(
 ; CHECK-SAME: <vscale x 4 x i32> [[PRED:%.*]], <vscale x 4 x i1> [[PASSTHRU:%.*]], <vscale x 4 x i32> [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> [[PRED]], <vscale x 4 x i1> [[PASSTHRU]], <vscale x 4 x i32> [[A]])
-; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> [[PRED]], <vscale x 4 x i1> [[PASSTHRU]], <vscale x 4 x i32> [[TMP1]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
 ;
   %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %pred, <vscale x 4 x i1> %passthru, <vscale x 4 x i32> %a)
   %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %pred, <vscale x 4 x i1> %passthru, <vscale x 4 x i32> %1)
@@ -75,9 +65,7 @@ define <vscale x 4 x i32> @aarch64_sve_revb_inv(<vscale x 4 x i32> %pred, <vscal
 define <vscale x 16 x i8> @aarch64_sve_revd_inv(<vscale x 16 x i8> %pred, <vscale x 16 x i1> %passthru, <vscale x 16 x i8> %a) #0 {
 ; CHECK-LABEL: define <vscale x 16 x i8> @aarch64_sve_revd_inv(
 ; CHECK-SAME: <vscale x 16 x i8> [[PRED:%.*]], <vscale x 16 x i1> [[PASSTHRU:%.*]], <vscale x 16 x i8> [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> [[PRED]], <vscale x 16 x i1> [[PASSTHRU]], <vscale x 16 x i8> [[A]])
-; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> [[PRED]], <vscale x 16 x i1> [[PASSTHRU]], <vscale x 16 x i8> [[TMP1]])
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP2]]
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
 ;
   %1 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %pred, <vscale x 16 x i1> %passthru, <vscale x 16 x i8> %a)
   %2 = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %pred, <vscale x 16 x i1> %passthru, <vscale x 16 x i8> %1)
@@ -87,9 +75,7 @@ define <vscale x 16 x i8> @aarch64_sve_revd_inv(<vscale x 16 x i8> %pred, <vscal
 define <vscale x 4 x i32> @aarch64_sve_revh_inv(<vscale x 4 x i32> %pred, <vscale x 4 x i1> %passthru, <vscale x 4 x i32> %a) #0 {
 ; CHECK-LABEL: define <vscale x 4 x i32> @aarch64_sve_revh_inv(
 ; CHECK-SAME: <vscale x 4 x i32> [[PRED:%.*]], <vscale x 4 x i1> [[PASSTHRU:%.*]], <vscale x 4 x i32> [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> [[PRED]], <vscale x 4 x i1> [[PASSTHRU]], <vscale x 4 x i32> [[A]])
-; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> [[PRED]], <vscale x 4 x i1> [[PASSTHRU]], <vscale x 4 x i32> [[TMP1]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
 ;
   %1 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %pred, <vscale x 4 x i1> %passthru, <vscale x 4 x i32> %a)
   %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %pred, <vscale x 4 x i1> %passthru, <vscale x 4 x i32> %1)
@@ -99,9 +85,7 @@ define <vscale x 4 x i32> @aarch64_sve_revh_inv(<vscale x 4 x i32> %pred, <vscal
 define <vscale x 2 x i64> @aarch64_sve_revw_inv(<vscale x 2 x i64> %pred, <vscale x 2 x i1> %passthru, <vscale x 2 x i64> %a) #0 {
 ; CHECK-LABEL: define <vscale x 2 x i64> @aarch64_sve_revw_inv(
 ; CHECK-SAME: <vscale x 2 x i64> [[PRED:%.*]], <vscale x 2 x i1> [[PASSTHRU:%.*]], <vscale x 2 x i64> [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> [[PRED]], <vscale x 2 x i1> [[PASSTHRU]], <vscale x 2 x i64> [[A]])
-; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> [[PRED]], <vscale x 2 x i1> [[PASSTHRU]], <vscale x 2 x i64> [[TMP1]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
 ;
   %1 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %pred, <vscale x 2 x i1> %passthru, <vscale x 2 x i64> %a)
   %2 = call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %pred, <vscale x 2 x i1> %passthru, <vscale x 2 x i64> %1)