[llvm] [VPlan] Simplify the computation of the block entry mask. (PR #173265)
Mel Chen via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 12 04:56:40 PDT 2026
https://github.com/Mel-Chen updated https://github.com/llvm/llvm-project/pull/173265
>From 721b61c81036010374e76d983e1c320c7525b564 Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Thu, 18 Dec 2025 01:32:12 -0800
Subject: [PATCH 1/2] use post-dom tree to prove bb is control-flow-equivalent
to header
---
.../Transforms/Vectorize/VPlanDominatorTree.h | 9 ++++
.../Transforms/Vectorize/VPlanPredicator.cpp | 19 ++++++++-
.../RISCV/tail-folding-complex-mask.ll | 16 +++-----
.../LoopVectorize/VPlan/predicator.ll | 41 ++++++++-----------
...predicated-loads-with-predicated-stores.ll | 12 +++---
.../LoopVectorize/if-pred-stores.ll | 20 ++++-----
6 files changed, 66 insertions(+), 51 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h b/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h
index 44506f5ac3e81..2864670f44913 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h
@@ -46,6 +46,15 @@ class VPDominatorTree : public DominatorTreeBase<VPBlockBase, false> {
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B);
};
+/// Template specialization of the standard LLVM post-dominator tree utility for
+/// VPBlockBases.
+class VPPostDominatorTree : public PostDomTreeBase<VPBlockBase> {
+ using Base = PostDomTreeBase<VPBlockBase>;
+
+public:
+ explicit VPPostDominatorTree(VPlan &Plan) { recalculate(Plan); }
+};
+
using VPDomTreeNode = DomTreeNodeBase<VPBlockBase>;
/// Template specializations of GraphTraits for VPDomTreeNode.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
index f22a33fa8eec3..4cbebcfa9472e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
@@ -14,6 +14,7 @@
#include "VPRecipeBuilder.h"
#include "VPlan.h"
#include "VPlanCFG.h"
+#include "VPlanDominatorTree.h"
#include "VPlanPatternMatch.h"
#include "VPlanTransforms.h"
#include "VPlanUtils.h"
@@ -27,6 +28,9 @@ class VPPredicator {
/// Builder to construct recipes to compute masks.
VPBuilder Builder;
+ /// Post-dominator tree for the VPlan.
+ VPPostDominatorTree VPPDT;
+
/// When we if-convert we need to create edge masks. We have to cache values
/// so that we don't end up with exponential recursion/IR.
using EdgeMaskCacheTy =
@@ -63,6 +67,8 @@ class VPPredicator {
}
public:
+ VPPredicator(VPlan &Plan) : VPPDT(Plan) {}
+
/// Returns the *entry* mask for \p VPBB.
VPValue *getBlockInMask(const VPBasicBlock *VPBB) const {
return BlockMaskCache.lookup(VPBB);
@@ -126,6 +132,15 @@ VPValue *VPPredicator::createEdgeMask(const VPBasicBlock *Src,
void VPPredicator::createBlockInMask(VPBasicBlock *VPBB) {
// Start inserting after the block's phis, which be replaced by blends later.
Builder.setInsertPoint(VPBB, VPBB->getFirstNonPhi());
+
+ // Reuse the mask of the header if the VPBB post-dominates the header.
+ // TODO: Generalize to reuse mask of immediate dominator.
+ VPBasicBlock *Header =
+ VPBB->getPlan()->getVectorLoopRegion()->getEntryBasicBlock();
+ if (VPPDT.properlyDominates(VPBB, Header)) {
+ setBlockInMask(VPBB, getBlockInMask(Header));
+ return;
+ }
// All-one mask is modelled as no-mask following the convention for masked
// load/store/gather/scatter. Initialize BlockMask to no-mask.
VPValue *BlockMask = nullptr;
@@ -229,7 +244,7 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
SmallVector<VPValue *, 2> OperandsWithMask;
for (const auto &[InVPV, InVPBB] : PhiR->incoming_values_and_blocks()) {
OperandsWithMask.push_back(InVPV);
- OperandsWithMask.push_back(getEdgeMask(InVPBB, VPBB));
+ OperandsWithMask.push_back(createEdgeMask(InVPBB, VPBB));
}
PHINode *IRPhi = cast_or_null<PHINode>(PhiR->getUnderlyingValue());
auto *Blend =
@@ -247,7 +262,7 @@ void VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan) {
VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
Header);
- VPPredicator Predicator;
+ VPPredicator Predicator(Plan);
for (VPBlockBase *VPB : RPOT) {
// Non-outer regions with VPBBs only are supported at the moment.
auto *VPBB = cast<VPBasicBlock>(VPB);
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-complex-mask.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-complex-mask.ll
index 1aa53e1ef95a0..a41a74bbb20b4 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-complex-mask.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-complex-mask.ll
@@ -73,31 +73,27 @@ define void @test(i64 %n, ptr noalias %src0, ptr noalias %src1, ptr noalias %src
; NO-VP-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; NO-VP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i1> poison, i1 [[C3]], i64 0
-; NO-VP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i1> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
; NO-VP-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i1> poison, i1 [[C1]], i64 0
; NO-VP-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i1> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
; NO-VP-NEXT: [[TMP6:%.*]] = xor <vscale x 4 x i1> [[BROADCAST_SPLAT2]], splat (i1 true)
; NO-VP-NEXT: [[TMP4:%.*]] = xor i1 [[C2]], true
; NO-VP-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x i1> poison, i1 [[TMP4]], i64 0
; NO-VP-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x i1> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
-; NO-VP-NEXT: [[TMP5:%.*]] = or <vscale x 4 x i1> [[BROADCAST_SPLAT2]], [[BROADCAST_SPLAT4]]
-; NO-VP-NEXT: [[TMP7:%.*]] = select <vscale x 4 x i1> [[TMP6]], <vscale x 4 x i1> [[TMP5]], <vscale x 4 x i1> zeroinitializer
-; NO-VP-NEXT: [[TMP8:%.*]] = or <vscale x 4 x i1> [[BROADCAST_SPLAT2]], [[TMP7]]
-; NO-VP-NEXT: [[TMP9:%.*]] = xor <vscale x 4 x i1> [[TMP5]], splat (i1 true)
+; NO-VP-NEXT: [[TMP9:%.*]] = or <vscale x 4 x i1> [[BROADCAST_SPLAT2]], [[BROADCAST_SPLAT4]]
; NO-VP-NEXT: [[TMP10:%.*]] = select <vscale x 4 x i1> [[TMP6]], <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i1> zeroinitializer
-; NO-VP-NEXT: [[TMP11:%.*]] = or <vscale x 4 x i1> [[TMP8]], [[TMP10]]
-; NO-VP-NEXT: [[TMP12:%.*]] = select <vscale x 4 x i1> [[TMP11]], <vscale x 4 x i1> [[BROADCAST_SPLAT]], <vscale x 4 x i1> zeroinitializer
+; NO-VP-NEXT: [[TMP8:%.*]] = or <vscale x 4 x i1> [[BROADCAST_SPLAT2]], [[TMP10]]
+; NO-VP-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <vscale x 4 x i1> poison, i1 [[C3]], i64 0
+; NO-VP-NEXT: [[TMP12:%.*]] = shufflevector <vscale x 4 x i1> [[BROADCAST_SPLATINSERT4]], <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
; NO-VP: [[VECTOR_BODY]]:
; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[SRC0]], i64 [[INDEX]]
; NO-VP-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> [[BROADCAST_SPLAT2]], <vscale x 4 x i32> poison)
-; NO-VP-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP7]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> [[WIDE_MASKED_LOAD]]
+; NO-VP-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP10]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> [[WIDE_MASKED_LOAD]]
; NO-VP-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[SRC1]], i64 [[INDEX]]
; NO-VP-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> poison)
; NO-VP-NEXT: [[TMP15:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_LOAD5]], [[PREDPHI]]
-; NO-VP-NEXT: [[PREDPHI6:%.*]] = select <vscale x 4 x i1> [[TMP10]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> [[TMP15]]
+; NO-VP-NEXT: [[PREDPHI6:%.*]] = select <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> [[TMP15]], <vscale x 4 x i32> zeroinitializer
; NO-VP-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[SRC2]], i64 [[INDEX]]
; NO-VP-NEXT: [[WIDE_MASKED_LOAD7:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr align 4 [[TMP16]], <vscale x 4 x i1> [[TMP12]], <vscale x 4 x i32> poison)
; NO-VP-NEXT: [[TMP17:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_LOAD7]], [[PREDPHI6]]
diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll b/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll
index ac12dd5f98bfe..91a3d8f27a191 100644
--- a/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll
+++ b/llvm/test/Transforms/LoopVectorize/VPlan/predicator.ll
@@ -21,11 +21,10 @@ define void @diamond_phi(ptr %a) {
; CHECK-NEXT: Successor(s): bb4
; CHECK-EMPTY:
; CHECK-NEXT: bb4:
-; CHECK-NEXT: EMIT vp<[[VP5:%[0-9]+]]> = or vp<[[VP4]]>, ir<%c0>
; CHECK-NEXT: BLEND ir<%phi4> = ir<%add2>/vp<[[VP4]]> ir<%add1>/ir<%c0>
-; CHECK-NEXT: EMIT store ir<%phi4>, ir<%gep>, vp<[[VP5]]>
-; CHECK-NEXT: EMIT ir<%iv.next> = add nuw nsw ir<%iv>, ir<1>, vp<[[VP5]]>
-; CHECK-NEXT: EMIT ir<%ec> = icmp eq ir<%iv.next>, ir<128>, vp<[[VP5]]>
+; CHECK-NEXT: EMIT store ir<%phi4>, ir<%gep>
+; CHECK-NEXT: EMIT ir<%iv.next> = add nuw nsw ir<%iv>, ir<1>
+; CHECK-NEXT: EMIT ir<%ec> = icmp eq ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1:%[0-9]+]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2:%[0-9]+]]>
; CHECK-NEXT: No successors
@@ -41,7 +40,7 @@ bb0:
; bb1 bb2
; \ /
; bb4
-; TODO: bb4 should be unmasked.
+; Verify that bb4 is unmasked.
%iv = phi i64 [0, %entry], [%iv.next, %bb4]
%gep = getelementptr i64, ptr %a, i64 %iv
%c0 = icmp sle i64 %iv, 0
@@ -97,11 +96,10 @@ define void @mask_reuse(ptr %a) {
; CHECK-EMPTY:
; CHECK-NEXT: bb4:
; CHECK-NEXT: EMIT vp<[[VP8:%[0-9]+]]> = not ir<%c0>
-; CHECK-NEXT: EMIT vp<[[VP9:%[0-9]+]]> = or vp<[[VP7]]>, vp<[[VP8]]>
; CHECK-NEXT: BLEND ir<%phi4> = ir<%add3>/vp<[[VP7]]> ir<%iv>/vp<[[VP8]]>
-; CHECK-NEXT: EMIT store ir<%phi4>, ir<%gep>, vp<[[VP9]]>
-; CHECK-NEXT: EMIT ir<%iv.next> = add nuw nsw ir<%iv>, ir<1>, vp<[[VP9]]>
-; CHECK-NEXT: EMIT ir<%ec> = icmp eq ir<%iv.next>, ir<128>, vp<[[VP9]]>
+; CHECK-NEXT: EMIT store ir<%phi4>, ir<%gep>
+; CHECK-NEXT: EMIT ir<%iv.next> = add nuw nsw ir<%iv>, ir<1>
+; CHECK-NEXT: EMIT ir<%ec> = icmp eq ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1:%[0-9]+]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2:%[0-9]+]]>
; CHECK-NEXT: No successors
@@ -121,7 +119,8 @@ bb0:
; bb3 /
; \ /
; bb4
-; TODO: bb3 can reuse bb1's mask and bb4 should be unmasked.
+; TODO: bb3 can reuse bb1's mask.
+; Verify that bb4 is unmasked.
%iv = phi i64 [0, %entry], [%iv.next, %bb4]
%gep = getelementptr i64, ptr %a, i64 %iv
%c0 = icmp sle i64 %iv, 0
@@ -206,11 +205,10 @@ define void @optimized_mask(ptr %a) {
; CHECK-NEXT: bb7:
; CHECK-NEXT: EMIT vp<[[VP15:%[0-9]+]]> = not ir<%c6>
; CHECK-NEXT: EMIT vp<[[VP16:%[0-9]+]]> = logical-and vp<[[VP4]]>, vp<[[VP15]]>
-; CHECK-NEXT: EMIT vp<[[VP17:%[0-9]+]]> = or vp<[[VP16]]>, vp<[[VP14]]>
; CHECK-NEXT: BLEND ir<%phi7> = ir<%add6>/vp<[[VP16]]> ir<%add5>/vp<[[VP14]]>
-; CHECK-NEXT: EMIT store ir<%phi7>, ir<%gep>, vp<[[VP17]]>
-; CHECK-NEXT: EMIT ir<%iv.next> = add nuw nsw ir<%iv>, ir<1>, vp<[[VP17]]>
-; CHECK-NEXT: EMIT ir<%ec> = icmp eq ir<%iv.next>, ir<128>, vp<[[VP17]]>
+; CHECK-NEXT: EMIT store ir<%phi7>, ir<%gep>
+; CHECK-NEXT: EMIT ir<%iv.next> = add nuw nsw ir<%iv>, ir<1>
+; CHECK-NEXT: EMIT ir<%ec> = icmp eq ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1:%[0-9]+]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2:%[0-9]+]]>
; CHECK-NEXT: No successors
@@ -321,15 +319,12 @@ define void @switch(ptr %a) {
; CHECK-NEXT: Successor(s): bb5
; CHECK-EMPTY:
; CHECK-NEXT: bb5:
-; CHECK-NEXT: EMIT vp<[[VP16:%[0-9]+]]> = or vp<[[VP9]]>, vp<[[VP15]]>
-; CHECK-NEXT: EMIT vp<[[VP17:%[0-9]+]]> = not ir<%c2>
-; CHECK-NEXT: EMIT vp<[[VP18:%[0-9]+]]> = logical-and vp<[[VP4]]>, vp<[[VP17]]>
-; CHECK-NEXT: EMIT vp<[[VP19:%[0-9]+]]> = or vp<[[VP16]]>, vp<[[VP18]]>
-; CHECK-NEXT: EMIT vp<[[VP20:%[0-9]+]]> = or vp<[[VP19]]>, vp<[[VP14]]>
-; CHECK-NEXT: BLEND ir<%phi5> = ir<%add4>/vp<[[VP9]]> ir<%add3>/vp<[[VP15]]> ir<%add2>/vp<[[VP18]]> ir<%add1>/vp<[[VP14]]>
-; CHECK-NEXT: EMIT store ir<%phi5>, ir<%gep>, vp<[[VP20]]>
-; CHECK-NEXT: EMIT ir<%iv.next> = add nuw nsw ir<%iv>, ir<1>, vp<[[VP20]]>
-; CHECK-NEXT: EMIT ir<%ec> = icmp eq ir<%iv.next>, ir<128>, vp<[[VP20]]>
+; CHECK-NEXT: EMIT vp<[[VP16:%[0-9]+]]> = not ir<%c2>
+; CHECK-NEXT: EMIT vp<[[VP17:%[0-9]+]]> = logical-and vp<[[VP4]]>, vp<[[VP16]]>
+; CHECK-NEXT: BLEND ir<%phi5> = ir<%add4>/vp<[[VP9]]> ir<%add3>/vp<[[VP15]]> ir<%add2>/vp<[[VP17]]> ir<%add1>/vp<[[VP14]]>
+; CHECK-NEXT: EMIT store ir<%phi5>, ir<%gep>
+; CHECK-NEXT: EMIT ir<%iv.next> = add nuw nsw ir<%iv>, ir<1>
+; CHECK-NEXT: EMIT ir<%ec> = icmp eq ir<%iv.next>, ir<128>
; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[VP3]]>, vp<[[VP1:%[0-9]+]]>
; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<[[VP2:%[0-9]+]]>
; CHECK-NEXT: No successors
diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
index 7f05452b94741..38527baffd2e1 100644
--- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
@@ -25,7 +25,7 @@ define void @test_stores_noalias_via_rt_checks_after_loads(ptr %dst, ptr %src, p
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META0:![0-9]+]]
-; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11)
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META3:![0-9]+]]
@@ -36,7 +36,7 @@ define void @test_stores_noalias_via_rt_checks_after_loads(ptr %dst, ptr %src, p
; CHECK-NEXT: [[TMP36:%.*]] = add <2 x i32> [[TMP17]], splat (i32 10)
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
-; CHECK-NEXT: [[TMP14:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP36]], <2 x i32> [[TMP19]]
+; CHECK-NEXT: [[TMP14:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[TMP19]], <2 x i32> [[TMP36]]
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP14]], i32 0
; CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP21]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]]
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[TMP14]], i32 1
@@ -667,9 +667,9 @@ define void @test_stores_not_sunk_aliasing_load_between(ptr %dst, ptr %mid, ptr
; CHECK: [[PRED_STORE_IF]]:
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
; CHECK-NEXT: store i32 20, ptr [[TMP6]], align 4, !alias.scope [[META71:![0-9]+]], !noalias [[META73:![0-9]+]]
-; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META71]], !noalias [[META73]]
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META71]], !noalias [[META73]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[MID]], i32 [[INDEX]]
-; CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP8]], align 4, !alias.scope [[META75:![0-9]+]], !noalias [[META68]]
+; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP8]], align 4, !alias.scope [[META75:![0-9]+]], !noalias [[META68]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
@@ -677,9 +677,9 @@ define void @test_stores_not_sunk_aliasing_load_between(ptr %dst, ptr %mid, ptr
; CHECK: [[PRED_STORE_IF10]]:
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP1]]
; CHECK-NEXT: store i32 20, ptr [[TMP10]], align 4, !alias.scope [[META71]], !noalias [[META73]]
-; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META71]], !noalias [[META73]]
+; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META71]], !noalias [[META73]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[MID]], i32 [[TMP1]]
-; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP12]], align 4, !alias.scope [[META75]], !noalias [[META68]]
+; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP12]], align 4, !alias.scope [[META75]], !noalias [[META68]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE11]]
; CHECK: [[PRED_STORE_CONTINUE11]]:
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
index 9a4f5ecd93a38..8cade4f3ea078 100644
--- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
@@ -736,10 +736,10 @@ define void @sinkable_predicated_store(ptr %A, ptr %B) {
; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP0]]
; UNROLL-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope [[META8:![0-9]+]]
; UNROLL-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4, !alias.scope [[META8]]
-; UNROLL-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], 0
-; UNROLL-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP6]], 0
-; UNROLL-NEXT: [[TMP9:%.*]] = select i1 [[TMP7]], i32 0, i32 1
-; UNROLL-NEXT: [[TMP10:%.*]] = select i1 [[TMP8]], i32 0, i32 1
+; UNROLL-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP5]], 0
+; UNROLL-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP6]], 0
+; UNROLL-NEXT: [[TMP9:%.*]] = select i1 [[TMP7]], i32 1, i32 0
+; UNROLL-NEXT: [[TMP10:%.*]] = select i1 [[TMP8]], i32 1, i32 0
; UNROLL-NEXT: store i32 [[TMP9]], ptr [[TMP1]], align 4, !alias.scope [[META11:![0-9]+]], !noalias [[META8]]
; UNROLL-NEXT: store i32 [[TMP10]], ptr [[TMP2]], align 4, !alias.scope [[META11]], !noalias [[META8]]
; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
@@ -789,10 +789,10 @@ define void @sinkable_predicated_store(ptr %A, ptr %B) {
; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP0]]
; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope [[META9:![0-9]+]]
; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4, !alias.scope [[META9]]
-; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], 0
-; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP6]], 0
-; UNROLL-NOSIMPLIFY-NEXT: [[TMP9:%.*]] = select i1 [[TMP7]], i32 0, i32 1
-; UNROLL-NOSIMPLIFY-NEXT: [[TMP10:%.*]] = select i1 [[TMP8]], i32 0, i32 1
+; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP5]], 0
+; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP6]], 0
+; UNROLL-NOSIMPLIFY-NEXT: [[TMP9:%.*]] = select i1 [[TMP7]], i32 1, i32 0
+; UNROLL-NOSIMPLIFY-NEXT: [[TMP10:%.*]] = select i1 [[TMP8]], i32 1, i32 0
; UNROLL-NOSIMPLIFY-NEXT: store i32 [[TMP9]], ptr [[TMP1]], align 4, !alias.scope [[META12:![0-9]+]], !noalias [[META9]]
; UNROLL-NOSIMPLIFY-NEXT: store i32 [[TMP10]], ptr [[TMP2]], align 4, !alias.scope [[META12]], !noalias [[META9]]
; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
@@ -838,8 +838,8 @@ define void @sinkable_predicated_store(ptr %A, ptr %B) {
; VEC-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP1]]
; VEC-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]]
; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META8:![0-9]+]]
-; VEC-NEXT: [[TMP5:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; VEC-NEXT: [[TMP6:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> zeroinitializer, <2 x i32> splat (i32 1)
+; VEC-NEXT: [[TMP5:%.*]] = icmp ne <2 x i32> [[WIDE_LOAD]], zeroinitializer
+; VEC-NEXT: [[TMP6:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer
; VEC-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0
; VEC-NEXT: store i32 [[TMP7]], ptr [[TMP2]], align 4, !alias.scope [[META11:![0-9]+]], !noalias [[META8]]
; VEC-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP6]], i32 1
>From 0839f797b755aba645fdf902b34481141df275ca Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Mon, 9 Mar 2026 23:57:40 -0700
Subject: [PATCH 2/2] reuse VPPostDominatorTree
---
.../Transforms/Vectorize/VPPostDomFrontierTest.cpp | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/llvm/unittests/Transforms/Vectorize/VPPostDomFrontierTest.cpp b/llvm/unittests/Transforms/Vectorize/VPPostDomFrontierTest.cpp
index f15f078c3a419..b225910d6c7fc 100644
--- a/llvm/unittests/Transforms/Vectorize/VPPostDomFrontierTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPPostDomFrontierTest.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "../lib/Transforms/Vectorize/VPlan.h"
+#include "../lib/Transforms/Vectorize/VPlanDominatorTree.h"
#include "VPlanTestBase.h"
#include "llvm/Analysis/DominanceFrontier.h"
@@ -49,8 +50,7 @@ TEST_F(VPPostDomFrontierTest, SingleExitTest) {
VPBlockUtils::connectBlocks(VPBB5, VPBB6);
VPBlockUtils::connectBlocks(VPBB6, VPBB7);
- PostDomTreeBase<VPBlockBase> VPPDT;
- VPPDT.recalculate(Plan);
+ VPPostDominatorTree VPPDT(Plan);
DominanceFrontierBase<VPBlockBase, true> VPPDF;
VPPDF.analyze(VPPDT);
@@ -116,8 +116,7 @@ TEST_F(VPPostDomFrontierTest, MultipleExitsTest) {
VPBlockUtils::connectBlocks(VPBB4, VPBB6);
VPBlockUtils::connectBlocks(VPBB5, VPBB6);
- PostDomTreeBase<VPBlockBase> VPPDT;
- VPPDT.recalculate(Plan);
+ VPPostDominatorTree VPPDT(Plan);
DominanceFrontierBase<VPBlockBase, true> VPPDF;
VPPDF.analyze(VPPDT);
More information about the llvm-commits
mailing list