[llvm-branch-commits] [llvm] [NFC][VPlan] Split `makeMemOpWideningDecisions` into subpasses (PR #182593)
Andrei Elovikov via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Feb 25 09:20:39 PST 2026
https://github.com/eas updated https://github.com/llvm/llvm-project/pull/182593
>From 2258dcc87a8e6602aaee190b6b6ae70b51a9bdf9 Mon Sep 17 00:00:00 2001
From: Andrei Elovikov <andrei.elovikov at sifive.com>
Date: Thu, 19 Feb 2026 11:28:54 -0800
Subject: [PATCH] [NFC][VPlan] Split `makeMemOpWideningDecisions` into
subpasses
The idea is to have handling of strided memory operations (either from
https://github.com/llvm/llvm-project/pull/147297 or for VPlan-based
multiversioning for unit-strided accesses) done after some mandatory
processing has been performed (e.g., some types **must** be scalarized)
but before legacy CM's decision to widen (gather/scatter) or scalarize
has been committed.
And in longer term, we can uplift all other memory widening decision to
be done here directly at VPlan level. I expect this structure would also
be beneficial for that.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 10 --
.../Transforms/Vectorize/VPlanTransforms.cpp | 124 ++++++++++++------
.../Transforms/Vectorize/VPlanTransforms.h | 10 ++
.../VPlan/vplan-print-after-all.ll | 3 +
4 files changed, 97 insertions(+), 50 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 8877ae5a7b6ba..183a2680fe191 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -422,16 +422,6 @@ static cl::opt<bool> ConsiderRegPressure(
// after prolog. See `emitIterationCountCheck`.
static constexpr uint32_t MinItersBypassWeights[] = {1, 127};
-/// A helper function that returns true if the given type is irregular. The
-/// type is irregular if its allocated size doesn't equal the store size of an
-/// element of the corresponding vector type.
-static bool hasIrregularType(Type *Ty, const DataLayout &DL) {
- // Determine if an array of N elements of type Ty is "bitcast compatible"
- // with a <N x Ty> vector.
- // This is only true if there is no padding between the array elements.
- return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty);
-}
-
/// A version of ScalarEvolution::getSmallConstantTripCount that returns an
/// ElementCount to include loops whose trip count is a function of vscale.
static ElementCount getSmallConstantTripCount(ScalarEvolution *SE,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 904dbb0192062..f1fde9f01351e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -6225,49 +6225,93 @@ void VPlanTransforms::makeMemOpWideningDecisions(
}
}
- auto *MiddleVPBB = Plan.getMiddleBlock();
- VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi();
+ // Few helpers to process different kinds of memory operations.
- for (VPInstruction *VPI : MemOps) {
- Instruction *Instr = cast<Instruction>(VPI->getUnderlyingValue());
- RecipeBuilder.getVPBuilder().setInsertPoint(VPI);
-
- auto ReplaceWith = [&](VPRecipeBase *New) {
- RecipeBuilder.setRecipe(Instr, New);
- RecipeBuilder.getVPBuilder().insert(New);
- if (VPI->getOpcode() == Instruction::Load)
- VPI->replaceAllUsesWith(New->getVPSingleValue());
- VPI->eraseFromParent();
- };
-
- // The stores with invariant address inside the loop will be deleted, and
- // in the exit block, a uniform store recipe will be created for the final
- // invariant store of the reduction.
- StoreInst *SI;
- if ((SI = dyn_cast<StoreInst>(Instr)) &&
- Legal.isInvariantAddressOfReduction(SI->getPointerOperand())) {
- // Only create recipe for the final invariant store of the reduction.
- if (Legal.isInvariantStoreOfReduction(SI)) {
- auto *Recipe = new VPReplicateRecipe(
- SI, VPI->operandsWithoutMask(), true /* IsUniform */,
- nullptr /*Mask*/, *VPI, *VPI, VPI->getDebugLoc());
- Recipe->insertBefore(*MiddleVPBB, MBIP);
- }
- VPI->eraseFromParent();
- continue;
- }
+ // To be used as argument to `VPlanTransforms::runPass` which explicitly
+ // specified pass name, hence `VPlan &` parameter.
+ auto ProcessSubset = [&](VPlan &, auto ProcessVPInst) {
+ SmallVector<VPInstruction *> RemainingMemOps;
+ for (VPInstruction *VPI : MemOps) {
+ RecipeBuilder.getVPBuilder().setInsertPoint(VPI);
- if (VPI->getOpcode() == Instruction::Store) {
- if (auto HistInfo = Legal.getHistogramInfo(cast<StoreInst>(Instr))) {
- ReplaceWith(RecipeBuilder.tryToWidenHistogram(*HistInfo, VPI));
- continue;
- }
+ if (!ProcessVPInst(VPI))
+ RemainingMemOps.push_back(VPI);
}
- VPRecipeBase *Recipe = RecipeBuilder.tryToWidenMemory(VPI, Range);
- if (!Recipe)
- Recipe = RecipeBuilder.handleReplication(cast<VPInstruction>(VPI), Range);
+ MemOps.clear();
+ std::swap(MemOps, RemainingMemOps);
+ };
+
+ auto ReplaceWith = [&](VPInstruction *VPI, VPRecipeBase *New) {
+ Instruction *Instr = cast<Instruction>(VPI->getUnderlyingValue());
+ RecipeBuilder.setRecipe(Instr, New);
+ RecipeBuilder.getVPBuilder().insert(New);
+ if (VPI->getOpcode() == Instruction::Load)
+ VPI->replaceAllUsesWith(New->getVPSingleValue());
+ VPI->eraseFromParent();
- ReplaceWith(Recipe);
- }
+ return true;
+ };
+
+ auto Scalarize = [&](VPInstruction *VPI) {
+ return ReplaceWith(VPI, RecipeBuilder.handleReplication(VPI, Range));
+ };
+
+ VPlanTransforms::runPass(
+ "lowerMemoryIdioms", ProcessSubset, Plan,
+ // Reduction stores need to happen in the same order, so MBIP shares state
+ // between iterations, hence mutable lambda.
+ [&, MBIP = Plan.getMiddleBlock()->getFirstNonPhi()](
+ VPInstruction *VPI) mutable {
+ Instruction *Instr = cast<Instruction>(VPI->getUnderlyingValue());
+
+ // The stores with invariant address inside the loop will be deleted,
+ // and in the exit block, a uniform store recipe will be created for
+ // the final invariant store of the reduction.
+ StoreInst *SI = dyn_cast<StoreInst>(Instr);
+ if (!SI)
+ return false;
+ if (Legal.isInvariantAddressOfReduction(SI->getPointerOperand())) {
+ // Only create recipe for the final invariant store of the
+ // reduction.
+ if (Legal.isInvariantStoreOfReduction(SI)) {
+ auto *Recipe = new VPReplicateRecipe(
+ SI, VPI->operandsWithoutMask(), true /* IsUniform */,
+ nullptr /*Mask*/, *VPI, *VPI, VPI->getDebugLoc());
+ Recipe->insertBefore(*Plan.getMiddleBlock(), MBIP);
+ }
+ VPI->eraseFromParent();
+ return true;
+ }
+
+ if (auto HistInfo = Legal.getHistogramInfo(cast<StoreInst>(Instr))) {
+ return ReplaceWith(VPI,
+ RecipeBuilder.tryToWidenHistogram(*HistInfo, VPI));
+ }
+
+ return false;
+ });
+
+ // If the instruction's allocated size doesn't equal it's type size, it
+ // requires padding and will be scalarized.
+ VPlanTransforms::runPass(
+ "scalarizeMemOpsWithIrregularTypes", ProcessSubset, Plan,
+ [&](VPInstruction *VPI) {
+ Instruction *I = VPI->getUnderlyingInstr();
+ if (hasIrregularType(getLoadStoreType(I), I->getDataLayout()))
+ return Scalarize(VPI);
+
+ return false;
+ });
+
+ VPlanTransforms::runPass("delegateMemOpWideningToLegacyCM", ProcessSubset,
+ Plan, [&](VPInstruction *VPI) {
+ VPRecipeBase *Recipe =
+ RecipeBuilder.tryToWidenMemory(VPI, Range);
+ if (!Recipe)
+ Recipe = RecipeBuilder.handleReplication(
+ cast<VPInstruction>(VPI), Range);
+
+ return ReplaceWith(VPI, Recipe);
+ });
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 3f3327f111d65..b98cec4bc915f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -493,6 +493,16 @@ struct VPlanTransforms {
LoopVectorizationLegality &Legal);
};
+/// A helper function that returns true if the given type is irregular. The
+/// type is irregular if its allocated size doesn't equal the store size of an
+/// element of the corresponding vector type.
+inline bool hasIrregularType(Type *Ty, const DataLayout &DL) {
+ // Determine if an array of N elements of type Ty is "bitcast compatible"
+ // with a <N x Ty> vector.
+ // This is only true if there is no padding between the array elements.
+ return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty);
+}
+
} // namespace llvm
#endif // LLVM_TRANSFORMS_VECTORIZE_VPLANTRANSFORMS_H
diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/vplan-print-after-all.ll b/llvm/test/Transforms/LoopVectorize/VPlan/vplan-print-after-all.ll
index 8617788c90584..706d91260a70e 100644
--- a/llvm/test/Transforms/LoopVectorize/VPlan/vplan-print-after-all.ll
+++ b/llvm/test/Transforms/LoopVectorize/VPlan/vplan-print-after-all.ll
@@ -5,6 +5,9 @@
; CHECK: VPlan for loop in 'foo' after printAfterInitialConstruction
; CHECK: VPlan for loop in 'foo' after VPlanTransforms::introduceMasksAndLinearize
+; CHECK: VPlan for loop in 'foo' after lowerMemoryIdioms
+; CHECK: VPlan for loop in 'foo' after scalarizeMemOpsWithIrregularTypes
+; CHECK: VPlan for loop in 'foo' after delegateMemOpWideningToLegacyCM
; CHECK: VPlan for loop in 'foo' after VPlanTransforms::makeMemOpWideningDecisions
; CHECK: VPlan for loop in 'foo' after VPlanTransforms::clearReductionWrapFlags
; CHECK: VPlan for loop in 'foo' after VPlanTransforms::optimizeFindIVReductions
More information about the llvm-branch-commits
mailing list