[llvm] [InstCombine] Implement vp.reverse elimination through binop/unop (PR #143963)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 12 13:23:13 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Philip Reames (preames)
<details>
<summary>Changes</summary>
This simply copies the structure of the vector.reverse patterns from just above, and reimplements them for the vp.reverse intrinsics when the masks and EVLs exactly match.
Its unfortunate that we have three different ways to represent a reverse (shuffle, vector.reverse, and vp.reverse) but I don't see an obvious way to remove any them because the semantics are slightly different.
This significantly improves vectorization in TSVC_2's s112 and s1112 loops when using EVL tail folding.
---
Full diff: https://github.com/llvm/llvm-project/pull/143963.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp (+36)
- (modified) llvm/test/Transforms/InstCombine/vp-reverse.ll (+10-22)
``````````diff
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index c169ab25b2106..ee7c9dbac2fa3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3582,6 +3582,42 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
break;
}
+ case Intrinsic::experimental_vp_reverse: {
+ Value *BO0, *BO1, *X, *Y;
+ Value *Vec = II->getArgOperand(0);
+ Value *Mask = II->getArgOperand(1);
+ Value *EVL = II->getArgOperand(2);
+ auto m_VPReverse = [&](Value *&Vec) {
+ return m_Intrinsic<Intrinsic::experimental_vp_reverse>(
+ m_Value(Vec), m_Specific(Mask), m_Specific(EVL));
+ };
+ if (match(Vec, m_OneUse(m_BinOp(m_Value(BO0), m_Value(BO1))))) {
+ auto *OldBinOp = cast<BinaryOperator>(Vec);
+ if (match(BO0, m_VPReverse(X))) {
+ // rev(binop rev(X), rev(Y)) --> binop X, Y
+ if (match(BO1, m_VPReverse(Y)))
+ return replaceInstUsesWith(CI, BinaryOperator::CreateWithCopiedFlags(
+ OldBinOp->getOpcode(), X, Y,
+ OldBinOp, OldBinOp->getName(),
+ II->getIterator()));
+ // rev(binop rev(X), BO1Splat) --> binop X, BO1Splat
+ if (isSplatValue(BO1))
+ return replaceInstUsesWith(CI, BinaryOperator::CreateWithCopiedFlags(
+ OldBinOp->getOpcode(), X, BO1,
+ OldBinOp, OldBinOp->getName(),
+ II->getIterator()));
+ }
+ }
+ // rev(unop rev(X)) --> unop X
+ if (match(Vec, m_OneUse(m_UnOp(m_VPReverse(X))))) {
+ auto *OldUnOp = cast<UnaryOperator>(Vec);
+ auto *NewUnOp = UnaryOperator::CreateWithCopiedFlags(
+ OldUnOp->getOpcode(), X, OldUnOp, OldUnOp->getName(),
+ II->getIterator());
+ return replaceInstUsesWith(CI, NewUnOp);
+ }
+ break;
+ }
case Intrinsic::vector_reduce_or:
case Intrinsic::vector_reduce_and: {
// Canonicalize logical or/and reductions:
diff --git a/llvm/test/Transforms/InstCombine/vp-reverse.ll b/llvm/test/Transforms/InstCombine/vp-reverse.ll
index 79e6c47bdf1b2..41b27be5f5248 100644
--- a/llvm/test/Transforms/InstCombine/vp-reverse.ll
+++ b/llvm/test/Transforms/InstCombine/vp-reverse.ll
@@ -3,11 +3,8 @@
define <vscale x 4 x i32> @binop_reverse_elim(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 %evl) {
; CHECK-LABEL: @binop_reverse_elim(
-; CHECK-NEXT: [[A:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[A1:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL:%.*]])
-; CHECK-NEXT: [[B:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[B1:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL]])
-; CHECK-NEXT: [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A]], [[B]]
-; CHECK-NEXT: [[ADD_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[ADD1]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD_REV]]
+; CHECK-NEXT: [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD1]]
;
%a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> splat (i1 true), i32 %evl)
%b.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %b, <vscale x 4 x i1> splat (i1 true), i32 %evl)
@@ -18,11 +15,8 @@ define <vscale x 4 x i32> @binop_reverse_elim(<vscale x 4 x i32> %a, <vscale x 4
define <vscale x 4 x i32> @binop_reverse_elim2(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl) {
; CHECK-LABEL: @binop_reverse_elim2(
-; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i1> [[M:%.*]], i32 [[EVL:%.*]])
-; CHECK-NEXT: [[B_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i1> [[M]], i32 [[EVL]])
-; CHECK-NEXT: [[ADD:%.*]] = add nsw <vscale x 4 x i32> [[A_REV]], [[B_REV]]
-; CHECK-NEXT: [[ADD_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[ADD]], <vscale x 4 x i1> [[M]], i32 [[EVL]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD_REV]]
+; CHECK-NEXT: [[ADD:%.*]] = add nsw <vscale x 4 x i32> [[A_REV:%.*]], [[B_REV:%.*]]
+; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD]]
;
%a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> %m, i32 %evl)
%b.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl)
@@ -63,10 +57,8 @@ define <vscale x 4 x i32> @binop_reverse_elim_diffevl(<vscale x 4 x i32> %a, <vs
define <vscale x 4 x i32> @binop_reverse_splat_elim(<vscale x 4 x i32> %a, i32 %evl) {
; CHECK-LABEL: @binop_reverse_splat_elim(
-; CHECK-NEXT: [[A:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[A1:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL:%.*]])
-; CHECK-NEXT: [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A]], splat (i32 22)
-; CHECK-NEXT: [[ADD_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[ADD1]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD_REV]]
+; CHECK-NEXT: [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A:%.*]], splat (i32 22)
+; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD1]]
;
%a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> splat (i1 true), i32 %evl)
%add = add nsw <vscale x 4 x i32> %a.rev, splat (i32 22)
@@ -76,10 +68,8 @@ define <vscale x 4 x i32> @binop_reverse_splat_elim(<vscale x 4 x i32> %a, i32 %
define <vscale x 4 x i32> @binop_reverse_splat_elim2(<vscale x 4 x i32> %a, i32 %evl) {
; CHECK-LABEL: @binop_reverse_splat_elim2(
-; CHECK-NEXT: [[A:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[A1:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL:%.*]])
-; CHECK-NEXT: [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A]], splat (i32 22)
-; CHECK-NEXT: [[ADD_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[ADD1]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL]])
-; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD_REV]]
+; CHECK-NEXT: [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A:%.*]], splat (i32 22)
+; CHECK-NEXT: ret <vscale x 4 x i32> [[ADD1]]
;
%a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vp.reverse(<vscale x 4 x i32> %a, <vscale x 4 x i1> splat (i1 true), i32 %evl)
%add = add nsw <vscale x 4 x i32> splat (i32 22), %a.rev
@@ -89,10 +79,8 @@ define <vscale x 4 x i32> @binop_reverse_splat_elim2(<vscale x 4 x i32> %a, i32
define <vscale x 4 x float> @unop_reverse_splat_elim(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i32 %evl) {
; CHECK-LABEL: @unop_reverse_splat_elim(
-; CHECK-NEXT: [[A_REV:%.*]] = tail call <vscale x 4 x float> @llvm.experimental.vp.reverse.nxv4f32(<vscale x 4 x float> [[A:%.*]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL:%.*]])
-; CHECK-NEXT: [[OP:%.*]] = fneg <vscale x 4 x float> [[A_REV]]
-; CHECK-NEXT: [[OP_REV:%.*]] = tail call <vscale x 4 x float> @llvm.experimental.vp.reverse.nxv4f32(<vscale x 4 x float> [[OP]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL]])
-; CHECK-NEXT: ret <vscale x 4 x float> [[OP_REV]]
+; CHECK-NEXT: [[OP:%.*]] = fneg <vscale x 4 x float> [[A_REV:%.*]]
+; CHECK-NEXT: ret <vscale x 4 x float> [[OP]]
;
%a.rev = tail call <vscale x 4 x float> @llvm.experimental.vp.reverse.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> splat (i1 true), i32 %evl)
%op = fneg <vscale x 4 x float> %a.rev
``````````
</details>
https://github.com/llvm/llvm-project/pull/143963
More information about the llvm-commits
mailing list