[llvm] [VPlan] Simplify vp.merge true, (or x, y), x -> vp.merge y, true, x (PR #135017)

Luke Lau via llvm-commits llvm-commits at lists.llvm.org
Sun Apr 13 13:05:34 PDT 2025


https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/135017

>From be5e3c074049712acce437ac5cf7d7d5aaab3901 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Wed, 9 Apr 2025 14:55:54 +0100
Subject: [PATCH 1/7] [VPlan] Simplify vp.merge true, (or x, y), x -> vp.merge
 y, true, x

With EVL tail folding an AnyOf reduction will emit a vp.merge like

vp.merge true, (or phi, cond), phi, evl

We can remove the or and optimise this to

vp.merge cond, true, phi, evl

Which makes it slightly easier to pattern match in #134898.

This adds a pattern matcher for VPWidenIntrinsicRecipe to help match this (only 4-ary intrinsics for now, can be extended if other users need)

Blended AnyOf reductions will emit use an and, which we may also be able to simplify in a later patch.
---
 .../Transforms/Vectorize/VPlanPatternMatch.h  | 29 +++++++++++++++++++
 .../Transforms/Vectorize/VPlanTransforms.cpp  | 11 +++++++
 ...ze-force-tail-with-evl-inloop-reduction.ll |  6 ++--
 ...vectorize-force-tail-with-evl-reduction.ll |  6 ++--
 4 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 2cd23efcf3eab..bfe9c484beeb4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -199,6 +199,8 @@ struct Recipe_match {
                   std::is_same<RecipeTy, VPDerivedIVRecipe>::value ||
                   std::is_same<RecipeTy, VPWidenGEPRecipe>::value)
       return DefR;
+    else if constexpr (std::is_same<RecipeTy, VPWidenIntrinsicRecipe>::value)
+      return DefR && DefR->getVectorIntrinsicID() == Opcode;
     else
       return DefR && DefR->getOpcode() == Opcode;
   }
@@ -439,6 +441,33 @@ m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {
   return VPDerivedIV_match<Op0_t, Op1_t, Op2_t>({Op0, Op1, Op2});
 }
 
+template <Intrinsic::ID IntrinsicID, typename... OpTys>
+using VPWidenIntrinsicMatch = Recipe_match<std::tuple<OpTys...>, IntrinsicID,
+                                           false, VPWidenIntrinsicRecipe>;
+
+template <Intrinsic::ID IntrinsicID, typename Op0_t, typename Op1_t,
+          typename Op2_t, typename Op3_t>
+inline VPWidenIntrinsicMatch<IntrinsicID, Op0_t, Op1_t, Op2_t, Op3_t>
+m_WidenIntrinsic(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2,
+                 const Op3_t &Op3) {
+  return VPWidenIntrinsicMatch<IntrinsicID, Op0_t, Op1_t, Op2_t, Op3_t>(
+      {Op0, Op1, Op2, Op3});
+}
+
+/// Intrinsic matchers.
+struct IntrinsicID_match {
+  unsigned ID;
+
+  IntrinsicID_match(Intrinsic::ID IntrID) : ID(IntrID) {}
+
+  template <typename OpTy> bool match(OpTy *V) {
+    if (const auto *CI = dyn_cast<CallInst>(V))
+      if (const auto *F = CI->getCalledFunction())
+        return F->getIntrinsicID() == ID;
+    return false;
+  }
+};
+
 } // namespace VPlanPatternMatch
 } // namespace llvm
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 67a55aa67c978..28b08d495f6ff 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1011,6 +1011,17 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
       TypeInfo.inferScalarType(R.getOperand(1)) ==
           TypeInfo.inferScalarType(R.getVPSingleValue()))
     return R.getVPSingleValue()->replaceAllUsesWith(R.getOperand(1));
+
+  // For i1 vp.merges produced by AnyOf reductions:
+  // vp.merge true, (or x, y), x, evl -> vp.merge y, true, x, evl
+  if (match(&R, m_WidenIntrinsic<Intrinsic::vp_merge>(
+                    m_True(), m_c_BinaryOr(m_VPValue(X), m_VPValue(Y)),
+                    m_Deferred(X), m_VPValue())) &&
+      TypeInfo.inferScalarType(R.getVPSingleValue())->isIntegerTy(1)) {
+    R.setOperand(1, R.getOperand(0));
+    R.setOperand(0, Y);
+    return;
+  }
 }
 
 void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll
index 1fb84dbc79c49..990ac0bdb72ad 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll
@@ -1899,8 +1899,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
 ; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0
 ; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
 ; IF-EVL-NEXT:    [[TMP14:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 3)
-; IF-EVL-NEXT:    [[TMP15:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP14]]
-; IF-EVL-NEXT:    [[TMP16]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP10]])
+; IF-EVL-NEXT:    [[TMP16]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP10]])
 ; IF-EVL-NEXT:    [[TMP17:%.*]] = zext i32 [[TMP10]] to i64
 ; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP17]], [[EVL_BASED_IV]]
 ; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
@@ -2024,8 +2023,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
 ; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0
 ; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
 ; IF-EVL-NEXT:    [[TMP14:%.*]] = fcmp fast olt <vscale x 4 x float> [[VP_OP_LOAD]], splat (float 3.000000e+00)
-; IF-EVL-NEXT:    [[TMP15:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP14]]
-; IF-EVL-NEXT:    [[TMP16]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP10]])
+; IF-EVL-NEXT:    [[TMP16]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP10]])
 ; IF-EVL-NEXT:    [[TMP17:%.*]] = zext i32 [[TMP10]] to i64
 ; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP17]], [[EVL_BASED_IV]]
 ; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll
index a9a9bf7294cca..7660d06a35d43 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll
@@ -1953,8 +1953,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
 ; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0
 ; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
 ; IF-EVL-NEXT:    [[TMP13:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 3)
-; IF-EVL-NEXT:    [[TMP14:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP13]]
-; IF-EVL-NEXT:    [[TMP15]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP9]])
+; IF-EVL-NEXT:    [[TMP15]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> [[TMP13]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP9]])
 ; IF-EVL-NEXT:    [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
 ; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
 ; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
@@ -2078,8 +2077,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
 ; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0
 ; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
 ; IF-EVL-NEXT:    [[TMP13:%.*]] = fcmp fast olt <vscale x 4 x float> [[VP_OP_LOAD]], splat (float 3.000000e+00)
-; IF-EVL-NEXT:    [[TMP14:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP13]]
-; IF-EVL-NEXT:    [[TMP15]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP9]])
+; IF-EVL-NEXT:    [[TMP15]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> [[TMP13]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP9]])
 ; IF-EVL-NEXT:    [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
 ; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
 ; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]

>From 530817a6aed6571dbe54efd21a107307be52ed70 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Wed, 9 Apr 2025 21:37:58 +0100
Subject: [PATCH 2/7] Remove unused matcher

---
 llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index bfe9c484beeb4..2830a12cf235a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -454,20 +454,6 @@ m_WidenIntrinsic(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2,
       {Op0, Op1, Op2, Op3});
 }
 
-/// Intrinsic matchers.
-struct IntrinsicID_match {
-  unsigned ID;
-
-  IntrinsicID_match(Intrinsic::ID IntrID) : ID(IntrID) {}
-
-  template <typename OpTy> bool match(OpTy *V) {
-    if (const auto *CI = dyn_cast<CallInst>(V))
-      if (const auto *F = CI->getCalledFunction())
-        return F->getIntrinsicID() == ID;
-    return false;
-  }
-};
-
 } // namespace VPlanPatternMatch
 } // namespace llvm
 

>From 5737af159552a6e4a37fdb9cdf4e490678f52836 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Wed, 9 Apr 2025 22:52:04 +0100
Subject: [PATCH 3/7] Generalize to m_Intrinsic, copy over from PatternMatch.h

---
 .../Transforms/Vectorize/VPlanPatternMatch.h  | 126 ++++++++++++++++--
 .../Transforms/Vectorize/VPlanTransforms.cpp  |   2 +-
 2 files changed, 115 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 2830a12cf235a..634f59da4cac3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -136,11 +136,32 @@ template <typename LTy, typename RTy> struct match_combine_or {
   }
 };
 
+template <typename LTy, typename RTy> struct match_combine_and {
+  LTy L;
+  RTy R;
+
+  match_combine_and(const LTy &Left, const RTy &Right) : L(Left), R(Right) {}
+
+  template <typename ITy> bool match(ITy *V) const {
+    if (L.match(V))
+      if (R.match(V))
+        return true;
+    return false;
+  }
+};
+
 template <typename LTy, typename RTy>
+/// Combine two pattern matchers matching L || R
 inline match_combine_or<LTy, RTy> m_CombineOr(const LTy &L, const RTy &R) {
   return match_combine_or<LTy, RTy>(L, R);
 }
 
+/// Combine two pattern matchers matching L && R
+template <typename LTy, typename RTy>
+inline match_combine_and<LTy, RTy> m_CombineAnd(const LTy &L, const RTy &R) {
+  return match_combine_and<LTy, RTy>(L, R);
+}
+
 /// Match a VPValue, capturing it if we match.
 inline bind_ty<VPValue> m_VPValue(VPValue *&V) { return V; }
 
@@ -199,8 +220,6 @@ struct Recipe_match {
                   std::is_same<RecipeTy, VPDerivedIVRecipe>::value ||
                   std::is_same<RecipeTy, VPWidenGEPRecipe>::value)
       return DefR;
-    else if constexpr (std::is_same<RecipeTy, VPWidenIntrinsicRecipe>::value)
-      return DefR && DefR->getVectorIntrinsicID() == Opcode;
     else
       return DefR && DefR->getOpcode() == Opcode;
   }
@@ -441,17 +460,100 @@ m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {
   return VPDerivedIV_match<Op0_t, Op1_t, Op2_t>({Op0, Op1, Op2});
 }
 
-template <Intrinsic::ID IntrinsicID, typename... OpTys>
-using VPWidenIntrinsicMatch = Recipe_match<std::tuple<OpTys...>, IntrinsicID,
-                                           false, VPWidenIntrinsicRecipe>;
+template <typename Opnd_t> struct Argument_match {
+  unsigned OpI;
+  Opnd_t Val;
+
+  Argument_match(unsigned OpIdx, const Opnd_t &V) : OpI(OpIdx), Val(V) {}
+
+  template <typename OpTy> bool match(OpTy *V) const {
+    if (const auto *R = dyn_cast<VPWidenIntrinsicRecipe>(V))
+      return Val.match(R->getOperand(OpI));
+    if (const auto *R = dyn_cast<VPReplicateRecipe>(V))
+      if (isa<CallInst>(R->getUnderlyingInstr()))
+        return Val.match(R->getOperand(OpI + 1));
+    return false;
+  }
+};
+
+/// Match an argument.
+template <unsigned OpI, typename Opnd_t>
+inline Argument_match<Opnd_t> m_Argument(const Opnd_t &Op) {
+  return Argument_match<Opnd_t>(OpI, Op);
+}
+
+/// Intrinsic matchers.
+struct IntrinsicID_match {
+  unsigned ID;
+
+  IntrinsicID_match(Intrinsic::ID IntrID) : ID(IntrID) {}
+
+  template <typename OpTy> bool match(OpTy *V) const {
+    if (const auto *R = dyn_cast<VPWidenIntrinsicRecipe>(V))
+      return R->getVectorIntrinsicID() == ID;
+    if (const auto *R = dyn_cast<VPReplicateRecipe>(V))
+      if (const auto *CI = dyn_cast<CallInst>(R->getUnderlyingInstr()))
+        if (const auto *F = CI->getCalledFunction())
+          return F->getIntrinsicID() == ID;
+    return false;
+  }
+};
+
+/// Intrinsic matches are combinations of ID matchers, and argument
+/// matchers. Higher arity matcher are defined recursively in terms of and-ing
+/// them with lower arity matchers. Here's some convenient typedefs for up to
+/// several arguments, and more can be added as needed
+template <typename T0 = void, typename T1 = void, typename T2 = void,
+          typename T3 = void, typename T4 = void, typename T5 = void,
+          typename T6 = void, typename T7 = void, typename T8 = void,
+          typename T9 = void, typename T10 = void>
+struct m_Intrinsic_Ty;
+template <typename T0> struct m_Intrinsic_Ty<T0> {
+  using Ty = match_combine_and<IntrinsicID_match, Argument_match<T0>>;
+};
+template <typename T0, typename T1> struct m_Intrinsic_Ty<T0, T1> {
+  using Ty =
+      match_combine_and<typename m_Intrinsic_Ty<T0>::Ty, Argument_match<T1>>;
+};
+template <typename T0, typename T1, typename T2>
+struct m_Intrinsic_Ty<T0, T1, T2> {
+  using Ty = match_combine_and<typename m_Intrinsic_Ty<T0, T1>::Ty,
+                               Argument_match<T2>>;
+};
+template <typename T0, typename T1, typename T2, typename T3>
+struct m_Intrinsic_Ty<T0, T1, T2, T3> {
+  using Ty = match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2>::Ty,
+                               Argument_match<T3>>;
+};
+
+/// Match intrinsic calls like this:
+/// m_Intrinsic<Intrinsic::fabs>(m_VPValue(X))
+template <Intrinsic::ID IntrID> inline IntrinsicID_match m_Intrinsic() {
+  return IntrinsicID_match(IntrID);
+}
+
+template <Intrinsic::ID IntrID, typename T0>
+inline typename m_Intrinsic_Ty<T0>::Ty m_Intrinsic(const T0 &Op0) {
+  return m_CombineAnd(m_Intrinsic<IntrID>(), m_Argument<0>(Op0));
+}
+
+template <Intrinsic::ID IntrID, typename T0, typename T1>
+inline typename m_Intrinsic_Ty<T0, T1>::Ty m_Intrinsic(const T0 &Op0,
+                                                       const T1 &Op1) {
+  return m_CombineAnd(m_Intrinsic<IntrID>(Op0), m_Argument<1>(Op1));
+}
+
+template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2>
+inline typename m_Intrinsic_Ty<T0, T1, T2>::Ty
+m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2) {
+  return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1), m_Argument<2>(Op2));
+}
 
-template <Intrinsic::ID IntrinsicID, typename Op0_t, typename Op1_t,
-          typename Op2_t, typename Op3_t>
-inline VPWidenIntrinsicMatch<IntrinsicID, Op0_t, Op1_t, Op2_t, Op3_t>
-m_WidenIntrinsic(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2,
-                 const Op3_t &Op3) {
-  return VPWidenIntrinsicMatch<IntrinsicID, Op0_t, Op1_t, Op2_t, Op3_t>(
-      {Op0, Op1, Op2, Op3});
+template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2,
+          typename T3>
+inline typename m_Intrinsic_Ty<T0, T1, T2, T3>::Ty
+m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3) {
+  return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2), m_Argument<3>(Op3));
 }
 
 } // namespace VPlanPatternMatch
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 28b08d495f6ff..2de46d6aa1e6c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1014,7 +1014,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
 
   // For i1 vp.merges produced by AnyOf reductions:
   // vp.merge true, (or x, y), x, evl -> vp.merge y, true, x, evl
-  if (match(&R, m_WidenIntrinsic<Intrinsic::vp_merge>(
+  if (match(&R, m_Intrinsic<Intrinsic::vp_merge>(
                     m_True(), m_c_BinaryOr(m_VPValue(X), m_VPValue(Y)),
                     m_Deferred(X), m_VPValue())) &&
       TypeInfo.inferScalarType(R.getVPSingleValue())->isIntegerTy(1)) {

>From 38bcc68681e123a4e70d891527b9c2841b7a0c15 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 10 Apr 2025 11:34:46 +0100
Subject: [PATCH 4/7] Fix commutative match

---
 llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 2de46d6aa1e6c..2050b2299d5f9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1014,9 +1014,9 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
 
   // For i1 vp.merges produced by AnyOf reductions:
   // vp.merge true, (or x, y), x, evl -> vp.merge y, true, x, evl
-  if (match(&R, m_Intrinsic<Intrinsic::vp_merge>(
-                    m_True(), m_c_BinaryOr(m_VPValue(X), m_VPValue(Y)),
-                    m_Deferred(X), m_VPValue())) &&
+  if (match(&R, m_Intrinsic<Intrinsic::vp_merge>(m_True(), m_VPValue(A),
+                                                 m_VPValue(X), m_VPValue())) &&
+      match(A, m_c_BinaryOr(m_Specific(X), m_VPValue(Y))) &&
       TypeInfo.inferScalarType(R.getVPSingleValue())->isIntegerTy(1)) {
     R.setOperand(1, R.getOperand(0));
     R.setOperand(0, Y);

>From f2cc512e7354acf80e51af363bdffe3df3d3e954 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 11 Apr 2025 15:09:24 +0100
Subject: [PATCH 5/7] Support VPWidenCallRecipe, remove unused template
 arguments, simplify match_combine_and::match

---
 llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 634f59da4cac3..3ed763a4a082f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -143,10 +143,7 @@ template <typename LTy, typename RTy> struct match_combine_and {
   match_combine_and(const LTy &Left, const RTy &Right) : L(Left), R(Right) {}
 
   template <typename ITy> bool match(ITy *V) const {
-    if (L.match(V))
-      if (R.match(V))
-        return true;
-    return false;
+    return L.match(V) && R.match(V);
   }
 };
 
@@ -469,6 +466,8 @@ template <typename Opnd_t> struct Argument_match {
   template <typename OpTy> bool match(OpTy *V) const {
     if (const auto *R = dyn_cast<VPWidenIntrinsicRecipe>(V))
       return Val.match(R->getOperand(OpI));
+    if (const auto *R = dyn_cast<VPWidenCallRecipe>(V))
+      return Val.match(R->getOperand(OpI));
     if (const auto *R = dyn_cast<VPReplicateRecipe>(V))
       if (isa<CallInst>(R->getUnderlyingInstr()))
         return Val.match(R->getOperand(OpI + 1));
@@ -491,6 +490,8 @@ struct IntrinsicID_match {
   template <typename OpTy> bool match(OpTy *V) const {
     if (const auto *R = dyn_cast<VPWidenIntrinsicRecipe>(V))
       return R->getVectorIntrinsicID() == ID;
+    if (const auto *R = dyn_cast<VPWidenCallRecipe>(V))
+      return R->getCalledScalarFunction()->getIntrinsicID() == ID;
     if (const auto *R = dyn_cast<VPReplicateRecipe>(V))
       if (const auto *CI = dyn_cast<CallInst>(R->getUnderlyingInstr()))
         if (const auto *F = CI->getCalledFunction())
@@ -504,9 +505,7 @@ struct IntrinsicID_match {
 /// them with lower arity matchers. Here's some convenient typedefs for up to
 /// several arguments, and more can be added as needed
 template <typename T0 = void, typename T1 = void, typename T2 = void,
-          typename T3 = void, typename T4 = void, typename T5 = void,
-          typename T6 = void, typename T7 = void, typename T8 = void,
-          typename T9 = void, typename T10 = void>
+          typename T3 = void>
 struct m_Intrinsic_Ty;
 template <typename T0> struct m_Intrinsic_Ty<T0> {
   using Ty = match_combine_and<IntrinsicID_match, Argument_match<T0>>;
@@ -521,7 +520,7 @@ struct m_Intrinsic_Ty<T0, T1, T2> {
                                Argument_match<T2>>;
 };
 template <typename T0, typename T1, typename T2, typename T3>
-struct m_Intrinsic_Ty<T0, T1, T2, T3> {
+struct m_Intrinsic_Ty {
   using Ty = match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2>::Ty,
                                Argument_match<T3>>;
 };

>From f21f6315d6de02ce04820211cd5ec510db57674c Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Sun, 13 Apr 2025 21:03:13 +0100
Subject: [PATCH 6/7] Update comments

---
 llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 3ed763a4a082f..bea4b9f2e15e0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -147,8 +147,8 @@ template <typename LTy, typename RTy> struct match_combine_and {
   }
 };
 
-template <typename LTy, typename RTy>
 /// Combine two pattern matchers matching L || R
+template <typename LTy, typename RTy>
 inline match_combine_or<LTy, RTy> m_CombineOr(const LTy &L, const RTy &R) {
   return match_combine_or<LTy, RTy>(L, R);
 }
@@ -457,7 +457,9 @@ m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {
   return VPDerivedIV_match<Op0_t, Op1_t, Op2_t>({Op0, Op1, Op2});
 }
 
+/// Match a call argument at a given argument index.
 template <typename Opnd_t> struct Argument_match {
+  /// Call argument index to match.
   unsigned OpI;
   Opnd_t Val;
 
@@ -475,7 +477,7 @@ template <typename Opnd_t> struct Argument_match {
   }
 };
 
-/// Match an argument.
+/// Match call argument.
 template <unsigned OpI, typename Opnd_t>
 inline Argument_match<Opnd_t> m_Argument(const Opnd_t &Op) {
   return Argument_match<Opnd_t>(OpI, Op);
@@ -526,7 +528,7 @@ struct m_Intrinsic_Ty {
 };
 
 /// Match intrinsic calls like this:
-/// m_Intrinsic<Intrinsic::fabs>(m_VPValue(X))
+/// m_Intrinsic<Intrinsic::fabs>(m_VPValue(X), ...)
 template <Intrinsic::ID IntrID> inline IntrinsicID_match m_Intrinsic() {
   return IntrinsicID_match(IntrID);
 }

>From 205bc770e81ca014af2808112b359fd53e8590fb Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Sun, 13 Apr 2025 21:05:14 +0100
Subject: [PATCH 7/7] Fix typo

---
 llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index bea4b9f2e15e0..92cc158dc0c34 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -477,7 +477,7 @@ template <typename Opnd_t> struct Argument_match {
   }
 };
 
-/// Match call argument.
+/// Match a call argument.
 template <unsigned OpI, typename Opnd_t>
 inline Argument_match<Opnd_t> m_Argument(const Opnd_t &Op) {
   return Argument_match<Opnd_t>(OpI, Op);



More information about the llvm-commits mailing list