[llvm] [InstCombine] Implement vp.reverse elimination through binop/unop (PR #143963)

Mon Jun 16 11:37:48 PDT 2025

https://github.com/preames updated https://github.com/llvm/llvm-project/pull/143963

>From 268a6f19c4873640aaea8ea4867ae898a09a3274 Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Thu, 12 Jun 2025 08:15:43 -0700
Subject: [PATCH 1/4] [InstCombine] Implement vp.reverse elimination through
 binop/unop

This simply copies the structure of the vector.reverse patterns from
just above, and reimplements them for the vp.reverse intrinsics
when the masks and EVLs exactly match.

Its unfortunate that we have three different ways to represent a
reverse (shuffle, vector.reverse, and vp.reverse) but I don't see
an obvious way to remove any them because the semantics are slightly
different.

This sigificantly improves vectorization in TSVC_2 s112 and s1112
loops when using EVL tail folding.
---
 .../InstCombine/InstCombineCalls.cpp          | 36 +++++++++++++++++++
 .../test/Transforms/InstCombine/vp-reverse.ll | 32 ++++++-----------
 2 files changed, 46 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index c169ab25b2106..ee7c9dbac2fa3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3582,6 +3582,42 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
     }
     break;
   }
+  case Intrinsic::experimental_vp_reverse: {
+    Value *BO0, *BO1, *X, *Y;
+    Value *Vec = II->getArgOperand(0);
+    Value *Mask = II->getArgOperand(1);
+    Value *EVL = II->getArgOperand(2);
+    auto m_VPReverse = [&](Value *&Vec) {
+      return m_Intrinsic<Intrinsic::experimental_vp_reverse>(
+          m_Value(Vec), m_Specific(Mask), m_Specific(EVL));
+    };
+    if (match(Vec, m_OneUse(m_BinOp(m_Value(BO0), m_Value(BO1))))) {
+      auto *OldBinOp = cast<BinaryOperator>(Vec);
+      if (match(BO0, m_VPReverse(X))) {
+        // rev(binop rev(X), rev(Y)) --> binop X, Y
+        if (match(BO1, m_VPReverse(Y)))
+          return replaceInstUsesWith(CI, BinaryOperator::CreateWithCopiedFlags(
+                                             OldBinOp->getOpcode(), X, Y,
+                                             OldBinOp, OldBinOp->getName(),
+                                             II->getIterator()));
+        // rev(binop rev(X), BO1Splat) --> binop X, BO1Splat
+        if (isSplatValue(BO1))
+          return replaceInstUsesWith(CI, BinaryOperator::CreateWithCopiedFlags(
+                                             OldBinOp->getOpcode(), X, BO1,
+                                             OldBinOp, OldBinOp->getName(),
+                                             II->getIterator()));
+      }
+    }
+    // rev(unop rev(X)) --> unop X
+    if (match(Vec, m_OneUse(m_UnOp(m_VPReverse(X))))) {
+      auto *OldUnOp = cast<UnaryOperator>(Vec);
+      auto *NewUnOp = UnaryOperator::CreateWithCopiedFlags(
+          OldUnOp->getOpcode(), X, OldUnOp, OldUnOp->getName(),
+          II->getIterator());
+      return replaceInstUsesWith(CI, NewUnOp);
+    }
+    break;
+  }
   case Intrinsic::vector_reduce_or:
   case Intrinsic::vector_reduce_and: {
     // Canonicalize logical or/and reductions:
diff --git a/llvm/test/Transforms/InstCombine/vp-reverse.ll b/llvm/test/Transforms/InstCombine/vp-reverse.ll
index 79e6c47bdf1b2..41b27be5f5248 100644
--- a/llvm/test/Transforms/InstCombine/vp-reverse.ll
+++ b/llvm/test/Transforms/InstCombine/vp-reverse.ll
@@ -3,11 +3,8 @@
 
 define <vscale x 4 x i32> @binop_reverse_elim(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 %evl) {
 ; CHECK-LABEL: @binop_reverse_elim(
-; CHECK-NEXT:    [[A:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[A1:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL:%.*]])
-; CHECK-NEXT:    [[B:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[B1:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL]])
-; CHECK-NEXT:    [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A]], [[B]]
-; CHECK-NEXT:    [[ADD_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[ADD1]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD_REV]]
+; CHECK-NEXT:    [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD1]]
 ;
   %a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> splat (i1 true), i32 %evl)
   %b.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl)
@@ -18,11 +15,8 @@ define <vscale x 4 x i32> @binop_reverse_elim(<vscale x 4 x i32> %a, <vscale x 4
 
 define <vscale x 4 x i32> @binop_reverse_elim2(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl) {
 ; CHECK-LABEL: @binop_reverse_elim2(
-; CHECK-NEXT:    [[A_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i1> [[M:%.*]], i32 [[EVL:%.*]])
-; CHECK-NEXT:    [[B_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i1> [[M]], i32 [[EVL]])
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw <vscale x 4 x i32> [[A_REV]], [[B_REV]]
-; CHECK-NEXT:    [[ADD_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[ADD]], <vscale x 4 x i1> [[M]], i32 [[EVL]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD_REV]]
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw <vscale x 4 x i32> [[A_REV:%.*]], [[B_REV:%.*]]
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD]]
 ;
   %a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> %m, i32 %evl)
   %b.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl)
@@ -63,10 +57,8 @@ define <vscale x 4 x i32> @binop_reverse_elim_diffevl(<vscale x 4 x i32> %a, <vs
 
 define <vscale x 4 x i32> @binop_reverse_splat_elim(<vscale x 4 x i32> %a, i32 %evl) {
 ; CHECK-LABEL: @binop_reverse_splat_elim(
-; CHECK-NEXT:    [[A:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[A1:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL:%.*]])
-; CHECK-NEXT:    [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A]], splat (i32 22)
-; CHECK-NEXT:    [[ADD_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[ADD1]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD_REV]]
+; CHECK-NEXT:    [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A:%.*]], splat (i32 22)
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD1]]
 ;
   %a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> splat (i1 true), i32 %evl)
   %add = add nsw <vscale x 4 x i32> %a.rev, splat (i32 22)
@@ -76,10 +68,8 @@ define <vscale x 4 x i32> @binop_reverse_splat_elim(<vscale x 4 x i32> %a, i32 %
 
 define <vscale x 4 x i32> @binop_reverse_splat_elim2(<vscale x 4 x i32> %a, i32 %evl) {
 ; CHECK-LABEL: @binop_reverse_splat_elim2(
-; CHECK-NEXT:    [[A:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[A1:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL:%.*]])
-; CHECK-NEXT:    [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A]], splat (i32 22)
-; CHECK-NEXT:    [[ADD_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[ADD1]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD_REV]]
+; CHECK-NEXT:    [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A:%.*]], splat (i32 22)
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD1]]
 ;
   %a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> splat (i1 true), i32 %evl)
   %add = add nsw <vscale x 4 x i32> splat (i32 22), %a.rev
@@ -89,10 +79,8 @@ define <vscale x 4 x i32> @binop_reverse_splat_elim2(<vscale x 4 x i32> %a, i32
 
 define <vscale x 4 x float> @unop_reverse_splat_elim(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 %evl) {
 ; CHECK-LABEL: @unop_reverse_splat_elim(
-; CHECK-NEXT:    [[A_REV:%.*]] = tail call <vscale x 4 x float> @llvm.experimental.vp.reverse.nxv4f32(<vscale x 4 x float> [[A:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL:%.*]])
-; CHECK-NEXT:    [[OP:%.*]] = fneg <vscale x 4 x float> [[A_REV]]
-; CHECK-NEXT:    [[OP_REV:%.*]] = tail call <vscale x 4 x float> @llvm.experimental.vp.reverse.nxv4f32(<vscale x 4 x float> [[OP]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL]])
-; CHECK-NEXT:    ret <vscale x 4 x float> [[OP_REV]]
+; CHECK-NEXT:    [[OP:%.*]] = fneg <vscale x 4 x float> [[A_REV:%.*]]
+; CHECK-NEXT:    ret <vscale x 4 x float> [[OP]]
 ;
   %a.rev = tail call <vscale x 4 x float> @llvm.experimental.vp.reverse.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> splat (i1 true), i32 %evl)
   %op = fneg <vscale x 4 x float> %a.rev

>From 225ff0bbe4eb6b211738d9c883c988c15e817c35 Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Fri, 13 Jun 2025 07:20:26 -0700
Subject: [PATCH 2/4] Address review comment

---
 .../lib/Transforms/InstCombine/InstCombineCalls.cpp |  4 +++-
 llvm/test/Transforms/InstCombine/vp-reverse.ll      | 13 +++++++++----
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index ee7c9dbac2fa3..e6aaa01a27283 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3586,10 +3586,12 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
     Value *BO0, *BO1, *X, *Y;
     Value *Vec = II->getArgOperand(0);
     Value *Mask = II->getArgOperand(1);
+    if (!match(Mask, m_AllOnes()))
+      break;
     Value *EVL = II->getArgOperand(2);
     auto m_VPReverse = [&](Value *&Vec) {
       return m_Intrinsic<Intrinsic::experimental_vp_reverse>(
-          m_Value(Vec), m_Specific(Mask), m_Specific(EVL));
+          m_Value(Vec), m_AllOnes(), m_Specific(EVL));
     };
     if (match(Vec, m_OneUse(m_BinOp(m_Value(BO0), m_Value(BO1))))) {
       auto *OldBinOp = cast<BinaryOperator>(Vec);
diff --git a/llvm/test/Transforms/InstCombine/vp-reverse.ll b/llvm/test/Transforms/InstCombine/vp-reverse.ll
index 41b27be5f5248..28645c5525e17 100644
--- a/llvm/test/Transforms/InstCombine/vp-reverse.ll
+++ b/llvm/test/Transforms/InstCombine/vp-reverse.ll
@@ -13,10 +13,15 @@ define <vscale x 4 x i32> @binop_reverse_elim(<vscale x 4 x i32> %a, <vscale x 4
   ret <vscale x 4 x i32> %add.rev
 }
 
-define <vscale x 4 x i32> @binop_reverse_elim2(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl) {
-; CHECK-LABEL: @binop_reverse_elim2(
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw <vscale x 4 x i32> [[A_REV:%.*]], [[B_REV:%.*]]
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD]]
+; Negative test - the mask needs to be reversed between the inner and
+; the outer to be correct.
+define <vscale x 4 x i32> @binop_reverse_elim_samemask(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl) {
+; CHECK-LABEL: @binop_reverse_elim_samemask(
+; CHECK-NEXT:    [[A_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i1> [[M:%.*]], i32 [[EVL:%.*]])
+; CHECK-NEXT:    [[B_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i1> [[M]], i32 [[EVL]])
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw <vscale x 4 x i32> [[A_REV]], [[B_REV]]
+; CHECK-NEXT:    [[ADD_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[ADD]], <vscale x 4 x i1> [[M]], i32 [[EVL]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD_REV]]
 ;
   %a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> %m, i32 %evl)
   %b.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl)

>From 8ba4d6c45fea0f939120079daf317e61741f50c2 Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Fri, 13 Jun 2025 07:30:35 -0700
Subject: [PATCH 3/4] Add handling for rev(binop BO0Splat, rev(Y)) --> binop
 BO0Splat, Y

---
 .../InstCombine/InstCombineCalls.cpp          |  6 ++++
 .../test/Transforms/InstCombine/vp-reverse.ll | 30 +++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e6aaa01a27283..9648306226447 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3609,6 +3609,12 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
                                              OldBinOp, OldBinOp->getName(),
                                              II->getIterator()));
       }
+      // rev(binop BO0Splat, rev(Y)) --> binop BO0Splat, Y
+      if (match(BO1, m_VPReverse(Y)) && isSplatValue(BO0))
+        return replaceInstUsesWith(CI,
+                                   BinaryOperator::CreateWithCopiedFlags(
+                                       OldBinOp->getOpcode(), BO0, Y, OldBinOp,
+                                       OldBinOp->getName(), II->getIterator()));
     }
     // rev(unop rev(X)) --> unop X
     if (match(Vec, m_OneUse(m_UnOp(m_VPReverse(X))))) {
diff --git a/llvm/test/Transforms/InstCombine/vp-reverse.ll b/llvm/test/Transforms/InstCombine/vp-reverse.ll
index 28645c5525e17..ad7ab33afce3c 100644
--- a/llvm/test/Transforms/InstCombine/vp-reverse.ll
+++ b/llvm/test/Transforms/InstCombine/vp-reverse.ll
@@ -82,6 +82,36 @@ define <vscale x 4 x i32> @binop_reverse_splat_elim2(<vscale x 4 x i32> %a, i32
   ret <vscale x 4 x i32> %add.rev
 }
 
+define <vscale x 4 x i32> @binop_reverse_splat_elim3(<vscale x 4 x i32> %a, i32 %b, i32 %evl) {
+; CHECK-LABEL: @binop_reverse_splat_elim3(
+; CHECK-NEXT:    [[B_INS:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
+; CHECK-NEXT:    [[B_VEC:%.*]] = shufflevector <vscale x 4 x i32> [[B_INS]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw <vscale x 4 x i32> [[B_VEC]], [[A_REV:%.*]]
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD]]
+;
+  %b.ins = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
+  %b.vec = shufflevector <vscale x 4 x i32> %b.ins, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+  %a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+  %add = add nsw <vscale x 4 x i32> %b.vec, %a.rev
+  %add.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %add, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+  ret <vscale x 4 x i32> %add.rev
+}
+
+define <vscale x 4 x i32> @binop_reverse_splat_elim4(<vscale x 4 x i32> %a, i32 %b, i32 %evl) {
+; CHECK-LABEL: @binop_reverse_splat_elim4(
+; CHECK-NEXT:    [[B_INS:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
+; CHECK-NEXT:    [[B_VEC:%.*]] = shufflevector <vscale x 4 x i32> [[B_INS]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A:%.*]], [[B_VEC]]
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD1]]
+;
+  %b.ins = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
+  %b.vec = shufflevector <vscale x 4 x i32> %b.ins, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+  %a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+  %add = add nsw <vscale x 4 x i32> %a.rev,  %b.vec
+  %add.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %add, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+  ret <vscale x 4 x i32> %add.rev
+}
+
 define <vscale x 4 x float> @unop_reverse_splat_elim(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 %evl) {
 ; CHECK-LABEL: @unop_reverse_splat_elim(
 ; CHECK-NEXT:    [[OP:%.*]] = fneg <vscale x 4 x float> [[A_REV:%.*]]

>From 098adb02af7374ab4fcdeb7adb5f4ba78f6f9f89 Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Mon, 16 Jun 2025 11:27:48 -0700
Subject: [PATCH 4/4] Rework as order canonicalization instead of elimination

---
 .../InstCombine/InstCombineCalls.cpp          | 32 ++----------------
 .../InstCombine/InstructionCombining.cpp      | 33 +++++++++++++++++++
 .../test/Transforms/InstCombine/vp-reverse.ll |  7 ++--
 3 files changed, 39 insertions(+), 33 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index cf78e9fa7cf1e..b56f552ec2253 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3591,41 +3591,15 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
     break;
   }
   case Intrinsic::experimental_vp_reverse: {
-    Value *BO0, *BO1, *X, *Y;
+    Value *X;
     Value *Vec = II->getArgOperand(0);
     Value *Mask = II->getArgOperand(1);
     if (!match(Mask, m_AllOnes()))
       break;
     Value *EVL = II->getArgOperand(2);
-    auto m_VPReverse = [&](Value *&Vec) {
-      return m_Intrinsic<Intrinsic::experimental_vp_reverse>(
-          m_Value(Vec), m_AllOnes(), m_Specific(EVL));
-    };
-    if (match(Vec, m_OneUse(m_BinOp(m_Value(BO0), m_Value(BO1))))) {
-      auto *OldBinOp = cast<BinaryOperator>(Vec);
-      if (match(BO0, m_VPReverse(X))) {
-        // rev(binop rev(X), rev(Y)) --> binop X, Y
-        if (match(BO1, m_VPReverse(Y)))
-          return replaceInstUsesWith(CI, BinaryOperator::CreateWithCopiedFlags(
-                                             OldBinOp->getOpcode(), X, Y,
-                                             OldBinOp, OldBinOp->getName(),
-                                             II->getIterator()));
-        // rev(binop rev(X), BO1Splat) --> binop X, BO1Splat
-        if (isSplatValue(BO1))
-          return replaceInstUsesWith(CI, BinaryOperator::CreateWithCopiedFlags(
-                                             OldBinOp->getOpcode(), X, BO1,
-                                             OldBinOp, OldBinOp->getName(),
-                                             II->getIterator()));
-      }
-      // rev(binop BO0Splat, rev(Y)) --> binop BO0Splat, Y
-      if (match(BO1, m_VPReverse(Y)) && isSplatValue(BO0))
-        return replaceInstUsesWith(CI,
-                                   BinaryOperator::CreateWithCopiedFlags(
-                                       OldBinOp->getOpcode(), BO0, Y, OldBinOp,
-                                       OldBinOp->getName(), II->getIterator()));
-    }
     // rev(unop rev(X)) --> unop X
-    if (match(Vec, m_OneUse(m_UnOp(m_VPReverse(X))))) {
+    if (match(Vec, m_OneUse(m_UnOp(m_Intrinsic<Intrinsic::experimental_vp_reverse>(
+          m_Value(X), m_AllOnes(), m_Specific(EVL)))))) {
       auto *OldUnOp = cast<UnaryOperator>(Vec);
       auto *NewUnOp = UnaryOperator::CreateWithCopiedFlags(
           OldUnOp->getOpcode(), X, OldUnOp, OldUnOp->getName(),
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 4fe900e9421f8..b51e059032b6c 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2231,6 +2231,39 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
   else if (isSplatValue(LHS) && match(RHS, m_OneUse(m_VecReverse(m_Value(V2)))))
     return createBinOpReverse(LHS, V2);
 
+  auto createBinOpVPReverse = [&](Value *X, Value *Y, Value *EVL) {
+    Value *V = Builder.CreateBinOp(Opcode, X, Y, Inst.getName());
+    if (auto *BO = dyn_cast<BinaryOperator>(V))
+      BO->copyIRFlags(&Inst);
+
+    ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
+    Value *AllTrueMask = Builder.CreateVectorSplat(EC, Builder.getTrue());
+    Module *M = Inst.getModule();
+    Function *F = Intrinsic::getOrInsertDeclaration(
+        M, Intrinsic::experimental_vp_reverse, V->getType());
+    return CallInst::Create(F, {V, AllTrueMask, EVL});
+  };
+
+  Value *EVL;
+  if (match(LHS, m_Intrinsic<Intrinsic::experimental_vp_reverse>(
+                     m_Value(V1), m_AllOnes(), m_Value(EVL)))) {
+    // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
+    if (match(RHS, m_Intrinsic<Intrinsic::experimental_vp_reverse>(
+                       m_Value(V2), m_AllOnes(), m_Specific(EVL))) &&
+        (LHS->hasOneUse() || RHS->hasOneUse() ||
+         (LHS == RHS && LHS->hasNUses(2))))
+      return createBinOpVPReverse(V1, V2, EVL);
+
+    // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
+    if (LHS->hasOneUse() && isSplatValue(RHS))
+      return createBinOpVPReverse(V1, RHS, EVL);
+  }
+  // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
+  else if (isSplatValue(LHS) &&
+           match(RHS, m_Intrinsic<Intrinsic::experimental_vp_reverse>(
+                          m_Value(V2), m_AllOnes(), m_Value(EVL))))
+    return createBinOpVPReverse(LHS, V2, EVL);
+
   // It may not be safe to reorder shuffles and things like div, urem, etc.
   // because we may trap when executing those ops on unknown vector elements.
   // See PR20059.
diff --git a/llvm/test/Transforms/InstCombine/vp-reverse.ll b/llvm/test/Transforms/InstCombine/vp-reverse.ll
index ad7ab33afce3c..540b57da3475b 100644
--- a/llvm/test/Transforms/InstCombine/vp-reverse.ll
+++ b/llvm/test/Transforms/InstCombine/vp-reverse.ll
@@ -47,10 +47,9 @@ define <vscale x 4 x i32> @binop_reverse_elim_diffmask(<vscale x 4 x i32> %a, <v
 
 define <vscale x 4 x i32> @binop_reverse_elim_diffevl(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 %evl) {
 ; CHECK-LABEL: @binop_reverse_elim_diffevl(
-; CHECK-NEXT:    [[A_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL:%.*]])
-; CHECK-NEXT:    [[B_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL]])
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw <vscale x 4 x i32> [[A_REV]], [[B_REV]]
-; CHECK-NEXT:    [[ADD_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[ADD]], <vscale x 4 x i1> splat (i1 true), i32 10)
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw <vscale x 4 x i32> [[A_REV:%.*]], [[B_REV:%.*]]
+; CHECK-NEXT:    [[ADD1:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[ADD]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL:%.*]])
+; CHECK-NEXT:    [[ADD_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[ADD1]], <vscale x 4 x i1> splat (i1 true), i32 10)
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD_REV]]
 ;
   %a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> splat (i1 true), i32 %evl)