[llvm] [VPlan] Introduce CSE pass (PR #151872)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 4 13:32:14 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
@llvm/pr-subscribers-backend-powerpc
Author: Ramkumar Ramachandra (artagnon)
<details>
<summary>Changes</summary>
Introduce a simple and limited common-subexpression-elimination pass at the VPlan-level, running late after recipes are executed. The long-term vision is to get rid of the legacy non-VPlan-based cse routine in LV, but this patch doesn't yet fully subsume it.
---
Patch is 207.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151872.diff
56 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+1)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+5)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+70)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.h (+4)
- (modified) llvm/lib/Transforms/Vectorize/VPlanUtils.h (+17)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll (+63-69)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/drop-poison-generating-flags.ll (+2-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll (+2-3)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll (+71-34)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll (+1-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll (+3-6)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll (+4-8)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll (+1-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll (+4-11)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll (+9-12)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll (+66-58)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll (-12)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll (+2-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll (+1-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll (+16-16)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll (+28-28)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll (+32-10)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll (+1-2)
- (modified) llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll (+5-10)
- (modified) llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-bswap.ll (+2-4)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll (+2-4)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll (+2-3)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll (+4-6)
- (modified) llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll (+1-2)
- (modified) llvm/test/Transforms/LoopVectorize/X86/cost-model.ll (-7)
- (modified) llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll (+2-8)
- (modified) llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll (+6-33)
- (modified) llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll (+66-57)
- (modified) llvm/test/Transforms/LoopVectorize/X86/replicate-uniform-call.ll (+1-2)
- (modified) llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll (+6-12)
- (modified) llvm/test/Transforms/LoopVectorize/assume.ll (+104-16)
- (modified) llvm/test/Transforms/LoopVectorize/dead_instructions.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll (+3-6)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll (+2-4)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll (+1-2)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll (+15-27)
- (modified) llvm/test/Transforms/LoopVectorize/induction.ll (+2-4)
- (modified) llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll (+1-2)
- (modified) llvm/test/Transforms/LoopVectorize/interleaved-accesses-different-insert-position.ll (+1-2)
- (modified) llvm/test/Transforms/LoopVectorize/opaque-ptr.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll (+36-29)
- (modified) llvm/test/Transforms/LoopVectorize/pr36983-multiple-lcssa.ll (+1-2)
- (modified) llvm/test/Transforms/LoopVectorize/pr59319-loop-access-info-invalidation.ll (+12-13)
- (modified) llvm/test/Transforms/LoopVectorize/pseudoprobe.ll (+2-3)
- (modified) llvm/test/Transforms/LoopVectorize/reverse_induction.ll (+3-6)
- (modified) llvm/test/Transforms/LoopVectorize/scalable-assume.ll (+148-17)
- (modified) llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll (+5-7)
- (modified) llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll (+9-10)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d04317bd8822d..b78017027dbf1 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7309,6 +7309,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
VPlanTransforms::narrowInterleaveGroups(
BestVPlan, BestVF,
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
+ VPlanTransforms::cse(BestVPlan, *Legal->getWidestInductionType());
VPlanTransforms::removeDeadRecipes(BestVPlan);
VPlanTransforms::convertToConcreteRecipes(BestVPlan,
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 39f5e3651e9bb..7929d30a1c9f5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -897,6 +897,11 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags {
return R && classof(R);
}
+ static inline bool classof(const VPSingleDefRecipe *U) {
+ auto *R = dyn_cast<VPRecipeBase>(U);
+ return R && classof(R);
+ }
+
void execute(VPTransformState &State) override = 0;
};
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 3ecffc7593d49..5e7815f26e9c0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1753,6 +1753,76 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
}
}
+/// Hash the underlying data of a VPSingleDefRecipe pointer, instead of hashing
+/// the pointer itself.
+namespace {
+struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
+ static bool isSentinel(const VPSingleDefRecipe *Def) {
+ return Def == getEmptyKey() || Def == getTombstoneKey();
+ }
+
+ static bool canHandle(const VPSingleDefRecipe *Def) {
+ return isa<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
+ VPWidenSelectRecipe, VPHistogramRecipe, VPPartialReductionRecipe,
+ VPReplicateRecipe, VPWidenIntrinsicRecipe>(Def);
+ }
+
+ static unsigned getHashValue(const VPSingleDefRecipe *Def) {
+ return hash_combine(Def->getVPDefID(), vputils::getOpcode(*Def),
+ vputils::isSingleScalar(Def),
+ hash_combine_range(Def->operands()));
+ }
+
+ static bool isEqual(const VPSingleDefRecipe *L, const VPSingleDefRecipe *R) {
+ if (isSentinel(L) || isSentinel(R))
+ return L == R;
+ bool Result = L->getVPDefID() == R->getVPDefID() &&
+ vputils::getOpcode(*L) == vputils::getOpcode(*R) &&
+ vputils::isSingleScalar(L) == vputils::isSingleScalar(R) &&
+ equal(L->operands(), R->operands());
+ assert(!Result || getHashValue(L) == getHashValue(R));
+ return Result;
+ }
+};
+} // end anonymous namespace
+
+/// Perform a common-subexpression-elimination of VPSingleDefRecipes on the \p
+/// Plan.
+void VPlanTransforms::cse(VPlan &Plan, Type &CanonicalIVTy) {
+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+ if (!LoopRegion)
+ return;
+ auto VPBBsOutsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_shallow(Plan.getEntry()));
+ auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_shallow(LoopRegion->getEntry()));
+
+ // There is existing logic to sink instructions into replicate regions, and
+ // we'd be undoing that work if we went through replicate regions. Hence,
+ // don't CSE in replicate regions.
+ DenseMap<VPSingleDefRecipe *, VPSingleDefRecipe *, VPCSEDenseMapInfo> CSEMap;
+ VPTypeAnalysis TypeInfo(&CanonicalIVTy);
+ for (VPBasicBlock *VPBB :
+ concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) {
+ for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
+ auto *Def = dyn_cast<VPSingleDefRecipe>(&R);
+ if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
+ continue;
+ if (VPSingleDefRecipe *V = CSEMap.lookup(Def)) {
+ if (TypeInfo.inferScalarType(Def) != TypeInfo.inferScalarType(V))
+ continue;
+ // Drop poison-generating flags when reusing a value.
+ if (auto *RFlags = dyn_cast<VPRecipeWithIRFlags>(V))
+ RFlags->dropPoisonGeneratingFlags();
+ Def->replaceAllUsesWith(V);
+ Def->eraseFromParent();
+ continue;
+ }
+ CSEMap[Def] = Def;
+ }
+ }
+}
+
/// Move loop-invariant recipes out of the vector loop region in \p Plan.
static void licm(VPlan &Plan) {
VPBasicBlock *Preheader = Plan.getVectorPreheader();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 5943684e17a76..9e99c781022d7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -240,6 +240,10 @@ struct VPlanTransforms {
/// removing dead edges to their successors.
static void removeBranchOnConst(VPlan &Plan);
+ /// Perform common-subexpression-elimination, which is best done after the \p
+ /// Plan is executed.
+ static void cse(VPlan &Plan, Type &CanonicalIVType);
+
/// If there's a single exit block, optimize its phi recipes that use exiting
/// IV values by feeding them precomputed end values instead, possibly taken
/// one step backwards.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 8dcd57f1b3598..f0a6540a91915 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -10,6 +10,7 @@
#define LLVM_TRANSFORMS_VECTORIZE_VPLANUTILS_H
#include "VPlan.h"
+#include "llvm/ADT/TypeSwitch.h"
namespace llvm {
class ScalarEvolution;
@@ -37,6 +38,22 @@ VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
/// SCEV expression could be constructed.
const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE);
+/// Get any instruction opcode data embedded in recipe \p R. Returns an optional
+/// pair, where the first element indicates whether it is an intrinsic ID.
+inline std::optional<std::pair<bool, unsigned>>
+getOpcode(const VPRecipeBase &R) {
+ return TypeSwitch<const VPRecipeBase *,
+ std::optional<std::pair<bool, unsigned>>>(&R)
+ .Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
+ VPWidenSelectRecipe, VPHistogramRecipe, VPPartialReductionRecipe,
+ VPReplicateRecipe>(
+ [](auto *I) { return std::make_pair(false, I->getOpcode()); })
+ .Case<VPWidenIntrinsicRecipe>([](auto *I) {
+ return std::make_pair(true, I->getVectorIntrinsicID());
+ })
+ .Default([](auto *) { return std::nullopt; });
+}
+
/// Returns true if \p VPV is a single scalar, either because it produces the
/// same value for all lanes or only has its first lane used.
inline bool isSingleScalar(const VPValue *VPV) {
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index 0232d88347d0a..cefb191f74c3e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -1032,8 +1032,8 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]]
; DEFAULT: [[VECTOR_BODY]]:
-; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE27:.*]] ]
-; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE27]] ]
+; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE25:.*]] ]
+; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE25]] ]
; DEFAULT-NEXT: [[TMP15:%.*]] = load float, ptr [[SRC_1]], align 4
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <8 x float> poison, float [[TMP15]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT8]], <8 x float> poison, <8 x i32> zeroinitializer
@@ -1046,10 +1046,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <8 x float> poison, float [[TMP19]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT10]], <8 x float> poison, <8 x i32> zeroinitializer
; DEFAULT-NEXT: [[TMP20:%.*]] = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> [[BROADCAST_SPLAT11]], <8 x float> zeroinitializer, <8 x float> [[TMP18]])
-; DEFAULT-NEXT: [[TMP21:%.*]] = load float, ptr [[SRC_3]], align 4
-; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <8 x float> poison, float [[TMP21]], i64 0
-; DEFAULT-NEXT: [[BROADCAST_SPLAT13:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT12]], <8 x float> poison, <8 x i32> zeroinitializer
-; DEFAULT-NEXT: [[TMP22:%.*]] = fcmp ogt <8 x float> [[TMP20]], [[BROADCAST_SPLAT13]]
+; DEFAULT-NEXT: [[TMP22:%.*]] = fcmp ogt <8 x float> [[TMP20]], [[BROADCAST_SPLAT11]]
; DEFAULT-NEXT: [[TMP23:%.*]] = getelementptr { [4 x float] }, ptr [[DST]], <8 x i64> [[VEC_IND]]
; DEFAULT-NEXT: [[TMP24:%.*]] = extractelement <8 x i1> [[TMP22]], i32 0
; DEFAULT-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
@@ -1067,8 +1064,8 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE]]
; DEFAULT: [[PRED_STORE_CONTINUE]]:
; DEFAULT-NEXT: [[TMP31:%.*]] = extractelement <8 x i1> [[TMP22]], i32 1
-; DEFAULT-NEXT: br i1 [[TMP31]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]]
-; DEFAULT: [[PRED_STORE_IF14]]:
+; DEFAULT-NEXT: br i1 [[TMP31]], label %[[PRED_STORE_IF12:.*]], label %[[PRED_STORE_CONTINUE13:.*]]
+; DEFAULT: [[PRED_STORE_IF12]]:
; DEFAULT-NEXT: [[TMP32:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 1
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP32]], align 4
; DEFAULT-NEXT: [[TMP33:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 1
@@ -1079,11 +1076,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP36]], align 4
; DEFAULT-NEXT: [[TMP37:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 1
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP37]], align 4
-; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE15]]
-; DEFAULT: [[PRED_STORE_CONTINUE15]]:
+; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE13]]
+; DEFAULT: [[PRED_STORE_CONTINUE13]]:
; DEFAULT-NEXT: [[TMP38:%.*]] = extractelement <8 x i1> [[TMP22]], i32 2
-; DEFAULT-NEXT: br i1 [[TMP38]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17:.*]]
-; DEFAULT: [[PRED_STORE_IF16]]:
+; DEFAULT-NEXT: br i1 [[TMP38]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]]
+; DEFAULT: [[PRED_STORE_IF14]]:
; DEFAULT-NEXT: [[TMP39:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 2
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP39]], align 4
; DEFAULT-NEXT: [[TMP40:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 2
@@ -1094,11 +1091,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP43]], align 4
; DEFAULT-NEXT: [[TMP44:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 2
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP44]], align 4
-; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE17]]
-; DEFAULT: [[PRED_STORE_CONTINUE17]]:
+; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE15]]
+; DEFAULT: [[PRED_STORE_CONTINUE15]]:
; DEFAULT-NEXT: [[TMP45:%.*]] = extractelement <8 x i1> [[TMP22]], i32 3
-; DEFAULT-NEXT: br i1 [[TMP45]], label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]]
-; DEFAULT: [[PRED_STORE_IF18]]:
+; DEFAULT-NEXT: br i1 [[TMP45]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17:.*]]
+; DEFAULT: [[PRED_STORE_IF16]]:
; DEFAULT-NEXT: [[TMP46:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 3
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP46]], align 4
; DEFAULT-NEXT: [[TMP47:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 3
@@ -1109,11 +1106,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP50]], align 4
; DEFAULT-NEXT: [[TMP51:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 3
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP51]], align 4
-; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE19]]
-; DEFAULT: [[PRED_STORE_CONTINUE19]]:
+; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE17]]
+; DEFAULT: [[PRED_STORE_CONTINUE17]]:
; DEFAULT-NEXT: [[TMP52:%.*]] = extractelement <8 x i1> [[TMP22]], i32 4
-; DEFAULT-NEXT: br i1 [[TMP52]], label %[[PRED_STORE_IF20:.*]], label %[[PRED_STORE_CONTINUE21:.*]]
-; DEFAULT: [[PRED_STORE_IF20]]:
+; DEFAULT-NEXT: br i1 [[TMP52]], label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]]
+; DEFAULT: [[PRED_STORE_IF18]]:
; DEFAULT-NEXT: [[TMP53:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 4
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP53]], align 4
; DEFAULT-NEXT: [[TMP54:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 4
@@ -1124,11 +1121,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP57]], align 4
; DEFAULT-NEXT: [[TMP58:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 4
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP58]], align 4
-; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE21]]
-; DEFAULT: [[PRED_STORE_CONTINUE21]]:
+; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE19]]
+; DEFAULT: [[PRED_STORE_CONTINUE19]]:
; DEFAULT-NEXT: [[TMP59:%.*]] = extractelement <8 x i1> [[TMP22]], i32 5
-; DEFAULT-NEXT: br i1 [[TMP59]], label %[[PRED_STORE_IF22:.*]], label %[[PRED_STORE_CONTINUE23:.*]]
-; DEFAULT: [[PRED_STORE_IF22]]:
+; DEFAULT-NEXT: br i1 [[TMP59]], label %[[PRED_STORE_IF20:.*]], label %[[PRED_STORE_CONTINUE21:.*]]
+; DEFAULT: [[PRED_STORE_IF20]]:
; DEFAULT-NEXT: [[TMP60:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 5
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP60]], align 4
; DEFAULT-NEXT: [[TMP61:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 5
@@ -1139,11 +1136,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP64]], align 4
; DEFAULT-NEXT: [[TMP65:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 5
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP65]], align 4
-; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE23]]
-; DEFAULT: [[PRED_STORE_CONTINUE23]]:
+; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE21]]
+; DEFAULT: [[PRED_STORE_CONTINUE21]]:
; DEFAULT-NEXT: [[TMP66:%.*]] = extractelement <8 x i1> [[TMP22]], i32 6
-; DEFAULT-NEXT: br i1 [[TMP66]], label %[[PRED_STORE_IF24:.*]], label %[[PRED_STORE_CONTINUE25:.*]]
-; DEFAULT: [[PRED_STORE_IF24]]:
+; DEFAULT-NEXT: br i1 [[TMP66]], label %[[PRED_STORE_IF22:.*]], label %[[PRED_STORE_CONTINUE23:.*]]
+; DEFAULT: [[PRED_STORE_IF22]]:
; DEFAULT-NEXT: [[TMP67:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 6
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP67]], align 4
; DEFAULT-NEXT: [[TMP68:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 6
@@ -1154,11 +1151,11 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP71]], align 4
; DEFAULT-NEXT: [[TMP72:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 6
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP72]], align 4
-; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE25]]
-; DEFAULT: [[PRED_STORE_CONTINUE25]]:
+; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE23]]
+; DEFAULT: [[PRED_STORE_CONTINUE23]]:
; DEFAULT-NEXT: [[TMP73:%.*]] = extractelement <8 x i1> [[TMP22]], i32 7
-; DEFAULT-NEXT: br i1 [[TMP73]], label %[[PRED_STORE_IF26:.*]], label %[[PRED_STORE_CONTINUE27]]
-; DEFAULT: [[PRED_STORE_IF26]]:
+; DEFAULT-NEXT: br i1 [[TMP73]], label %[[PRED_STORE_IF24:.*]], label %[[PRED_STORE_CONTINUE25]]
+; DEFAULT: [[PRED_STORE_IF24]]:
; DEFAULT-NEXT: [[TMP74:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 7
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP74]], align 4
; DEFAULT-NEXT: [[TMP75:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 7
@@ -1169,8 +1166,8 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP78]], align 4
; DEFAULT-NEXT: [[TMP79:%.*]] = extractelement <8 x ptr> [[TMP23]], i32 7
; DEFAULT-NEXT: store float 0.000000e+00, ptr [[TMP79]], align 4
-; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE27]]
-; DEFAULT: [[PRED_STORE_CONTINUE27]]:
+; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE25]]
+; DEFAULT: [[PRED_STORE_CONTINUE25]]:
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
; DEFAULT-NEXT: [[TMP80:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -1251,9 +1248,9 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 0, i64 [[TMP0]])
; PRED-NEXT: br label %[[VECTOR_BODY:.*]]
; PRED: [[VECTOR_BODY]]:
-; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE27:.*]] ]
-; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE27]] ]
-; PRED-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE27]] ]
+; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE25:.*]] ]
+; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <8 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE25]] ]
+; PRED-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE25]] ]
; PRED-NEXT: [[TMP18:%.*]] = load float, ptr [[SRC_1]], align 4
; PRED-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <8 x float> poison, float [[TMP18]], i64 0
; PRED-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT8]], <8 x float> poison, <8 x i32> zeroinitializer
@@ -1266,10 +1263,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; PRED-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <8 x float> poison, float [[TMP22]], i64 0
; PRED-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT10]], <8 x float> poison, <8 x i32> zeroinitializer
; PRED-NEXT: [[TMP23:%.*]] = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> [[BROADCAST_SPLAT11]], <8 x float> zeroinitializer, <8 x float> [[TMP21]])
-; PRED-NEXT: [[TMP24:%.*]] = load float, ptr [[SRC_3]], align 4
-; PRED-NEXT: [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <8 x float> poison, float [[TMP24]], i64 0
-; PRED-NEXT: [[BROADCAST_SPLAT13:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT12]], <8 x float> poison, <8 x i32> zeroinitializer
-; PRED-NEXT: [[...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/151872
More information about the llvm-commits
mailing list