[llvm] [LV] Add a flag to conservatively choose a larger vector factor when maximizing bandwidth (PR #156012)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 29 05:16:59 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Yuta Mukai (ytmukai)
<details>
<summary>Changes</summary>
Add a flag -vectorizer-maximize-bandwidth-conservatively to conservatively choose a larger vector factor when considering candidates up to the factor that matches the smallest type size. When the vector factor is large, pack/unpack instructions for vector registers may be required, which can lead to performance degradation due to the vector calculation pipeline becoming a bottleneck, even if the overall number of instructions is reduced. When this flag is enabled, a larger factor is chosen only if it is superior not only in terms of overall cost but also when compared solely based on the cost of vector calculation.
---
Patch is 20.71 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156012.diff
9 Files Affected:
- (modified) llvm/include/llvm/Analysis/TargetTransformInfo.h (+7)
- (modified) llvm/include/llvm/Analysis/TargetTransformInfoImpl.h (+5)
- (modified) llvm/lib/Analysis/TargetTransformInfo.cpp (+5)
- (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+6-1)
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h (+2-1)
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+63-14)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.cpp (+41-13)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+8-4)
- (added) llvm/test/Transforms/LoopVectorize/AArch64/maximize-bandwidth-conservatively.ll (+58)
``````````diff
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index c4ba8e9857dc4..abf087281fe41 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1231,6 +1231,13 @@ class TargetTransformInfo {
LLVM_ABI bool
shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const;
+ /// \return True if vectorization factors wider than those matching the
+ /// largest element type should be chosen conservatively. This only makes
+ /// sense when shouldMaximizeVectorBandwidth returns true.
+ /// \p K Register Kind for vectorization.
+ LLVM_ABI bool shouldMaximizeVectorBandwidthConservatively(
+ TargetTransformInfo::RegisterKind K) const;
+
/// \return The minimum vectorization factor for types of given element
/// bit width, or 0 if there is no minimum VF. The returned value only
/// applies when shouldMaximizeVectorBandwidth returns true.
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 43813d2f3acb5..6651505be9b86 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -597,6 +597,11 @@ class TargetTransformInfoImplBase {
return false;
}
+ virtual bool shouldMaximizeVectorBandwidthConservatively(
+ TargetTransformInfo::RegisterKind K) const {
+ return false;
+ }
+
virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const {
return ElementCount::get(0, IsScalable);
}
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 4ac8f03e6dbf5..0485581b8006c 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -803,6 +803,11 @@ bool TargetTransformInfo::shouldMaximizeVectorBandwidth(
return TTIImpl->shouldMaximizeVectorBandwidth(K);
}
+bool TargetTransformInfo::shouldMaximizeVectorBandwidthConservatively(
+ TargetTransformInfo::RegisterKind K) const {
+ return TTIImpl->shouldMaximizeVectorBandwidthConservatively(K);
+}
+
ElementCount TargetTransformInfo::getMinimumVF(unsigned ElemWidth,
bool IsScalable) const {
return TTIImpl->getMinimumVF(ElemWidth, IsScalable);
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 490f6391c15a0..ac75a8a1727e9 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -76,6 +76,9 @@ static cl::opt<unsigned> DMBLookaheadThreshold(
"dmb-lookahead-threshold", cl::init(10), cl::Hidden,
cl::desc("The number of instructions to search for a redundant dmb"));
+static cl::opt<bool> EnableSVEMaximizeVecBW("enable-sve-maximize-vec-bw",
+ cl::init(false), cl::Hidden);
+
namespace {
class TailFoldingOption {
// These bitfields will only ever be set to something non-zero in operator=,
@@ -370,7 +373,9 @@ bool AArch64TTIImpl::shouldMaximizeVectorBandwidth(
TargetTransformInfo::RegisterKind K) const {
assert(K != TargetTransformInfo::RGK_Scalar);
return (K == TargetTransformInfo::RGK_FixedWidthVector &&
- ST->isNeonAvailable());
+ ST->isNeonAvailable()) ||
+ (EnableSVEMaximizeVecBW &&
+ K == TargetTransformInfo::RGK_ScalableVector && ST->isSVEAvailable());
}
/// Calculate the cost of materializing a 64-bit value. This helper
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 838476dcae661..c747920b0a318 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -474,7 +474,8 @@ class LoopVectorizationPlanner {
///
/// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has
/// been retired.
- InstructionCost cost(VPlan &Plan, ElementCount VF) const;
+ InstructionCost cost(VPlan &Plan, ElementCount VF,
+ bool CountsVecCalcOnly = false) const;
/// Precompute costs for certain instructions using the legacy cost model. The
/// function is used to bring up the VPlan-based cost model to initially avoid
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index a0f306c12754f..a70c21353139d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -263,6 +263,11 @@ static cl::opt<bool> MaximizeBandwidth(
cl::desc("Maximize bandwidth when selecting vectorization factor which "
"will be determined by the smallest type in loop."));
+static cl::opt<bool> MaximizeBandwidthConservatively(
+ "vectorizer-maximize-bandwidth-conservatively", cl::init(false), cl::Hidden,
+ cl::desc("When MaximizeBandwidth is enabled, a larger vector factor is "
+ "chosen conservatively."));
+
static cl::opt<bool> EnableInterleavedMemAccesses(
"enable-interleaved-mem-accesses", cl::init(false), cl::Hidden,
cl::desc("Enable vectorization on interleaved memory accesses in a loop"));
@@ -962,9 +967,16 @@ class LoopVectorizationCostModel {
/// user options, for the given register kind.
bool useMaxBandwidth(TargetTransformInfo::RegisterKind RegKind);
+ /// \return True if maximizing vector bandwidth should be applied
+ /// conservatively by the target or user options, for the given register kind.
+ /// This only makes sense when useMaxBandwidth returns true.
+ bool useMaxBandwidthConservatively(TargetTransformInfo::RegisterKind RegKind);
+
/// \return True if register pressure should be calculated for the given VF.
bool shouldCalculateRegPressureForVF(ElementCount VF);
+ bool isVFForMaxBandwidth(ElementCount VF);
+
/// \return The size (in bits) of the smallest and widest types in the code
/// that needs to be vectorized. We ignore values that remain scalar such as
/// 64 bit loop indices.
@@ -3812,11 +3824,15 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
bool LoopVectorizationCostModel::shouldCalculateRegPressureForVF(
ElementCount VF) {
+ // Only calculate register pressure for VFs enabled by MaxBandwidth.
+ return isVFForMaxBandwidth(VF);
+}
+
+bool LoopVectorizationCostModel::isVFForMaxBandwidth(ElementCount VF) {
if (!useMaxBandwidth(VF.isScalable()
? TargetTransformInfo::RGK_ScalableVector
: TargetTransformInfo::RGK_FixedWidthVector))
return false;
- // Only calculate register pressure for VFs enabled by MaxBandwidth.
return ElementCount::isKnownGT(
VF, VF.isScalable() ? MaxPermissibleVFWithoutMaxBW.ScalableVF
: MaxPermissibleVFWithoutMaxBW.FixedVF);
@@ -3830,6 +3846,13 @@ bool LoopVectorizationCostModel::useMaxBandwidth(
Legal->hasVectorCallVariants())));
}
+bool LoopVectorizationCostModel::useMaxBandwidthConservatively(
+ TargetTransformInfo::RegisterKind RegKind) {
+ return MaximizeBandwidthConservatively ||
+ (MaximizeBandwidthConservatively.getNumOccurrences() == 0 &&
+ TTI.shouldMaximizeVectorBandwidthConservatively(RegKind));
+}
+
ElementCount LoopVectorizationCostModel::clampVFByMaxTripCount(
ElementCount VF, unsigned MaxTripCount, bool FoldTailByMasking) const {
unsigned EstimatedVF = VF.getKnownMinValue();
@@ -6923,13 +6946,16 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
return Cost;
}
-InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
- ElementCount VF) const {
+InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan, ElementCount VF,
+ bool CountsVecCalcOnly) const {
VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind);
- InstructionCost Cost = precomputeCosts(Plan, VF, CostCtx);
+ InstructionCost Cost;
+
+ if (!CountsVecCalcOnly)
+ Cost += precomputeCosts(Plan, VF, CostCtx);
// Now compute and add the VPlan-based cost.
- Cost += Plan.cost(VF, CostCtx);
+ Cost += Plan.cost(VF, CostCtx, CountsVecCalcOnly);
#ifndef NDEBUG
unsigned EstimatedWidth = estimateElementCount(VF, CM.getVScaleForTuning());
LLVM_DEBUG(dbgs() << "Cost for VF " << VF << ": " << Cost
@@ -7105,8 +7131,25 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
continue;
}
- if (isMoreProfitable(CurrentFactor, BestFactor, P->hasScalarTail()))
- BestFactor = CurrentFactor;
+ if (isMoreProfitable(CurrentFactor, BestFactor, P->hasScalarTail())) {
+ if (CM.isVFForMaxBandwidth(VF) &&
+ CM.useMaxBandwidthConservatively(
+ VF.isScalable() ? TargetTransformInfo::RGK_ScalableVector
+ : TargetTransformInfo::RGK_FixedWidthVector)) {
+ if (ElementCount::isKnownLT(BestFactor.Width, VF) &&
+ llvm::find(VFs, BestFactor.Width)) {
+ VectorizationFactor BestFactorVecCalc(
+ BestFactor.Width, cost(*P, BestFactor.Width, true), ScalarCost);
+ VectorizationFactor CurrentFactorVecCalc(VF, cost(*P, VF, true),
+ ScalarCost);
+ if (isMoreProfitable(CurrentFactorVecCalc, BestFactorVecCalc,
+ P->hasScalarTail()))
+ BestFactor = CurrentFactor;
+ }
+ } else {
+ BestFactor = CurrentFactor;
+ }
+ }
// If profitable add it to ProfitableVF list.
if (isMoreProfitable(CurrentFactor, ScalarFactor, P->hasScalarTail()))
@@ -7131,13 +7174,19 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
// Verify that the VPlan-based and legacy cost models agree, except for VPlans
// with early exits and plans with additional VPlan simplifications. The
// legacy cost model doesn't properly model costs for such loops.
- assert((BestFactor.Width == LegacyVF.Width || BestPlan.hasEarlyExit() ||
- planContainsAdditionalSimplifications(getPlanFor(BestFactor.Width),
- CostCtx, OrigLoop,
- BestFactor.Width) ||
- planContainsAdditionalSimplifications(
- getPlanFor(LegacyVF.Width), CostCtx, OrigLoop, LegacyVF.Width)) &&
- " VPlan cost model and legacy cost model disagreed");
+ if (!CM.isVFForMaxBandwidth(LegacyVF.Width) ||
+ !CM.useMaxBandwidthConservatively(
+ LegacyVF.Width.isScalable()
+ ? TargetTransformInfo::RGK_ScalableVector
+ : TargetTransformInfo::RGK_FixedWidthVector))
+ assert((BestFactor.Width == LegacyVF.Width || BestPlan.hasEarlyExit() ||
+ planContainsAdditionalSimplifications(getPlanFor(BestFactor.Width),
+ CostCtx, OrigLoop,
+ BestFactor.Width) ||
+ planContainsAdditionalSimplifications(getPlanFor(LegacyVF.Width),
+ CostCtx, OrigLoop,
+ LegacyVF.Width)) &&
+ " VPlan cost model and legacy cost model disagreed");
assert((BestFactor.Width.isScalar() || BestFactor.ScalarCost > 0) &&
"when vectorizing, the scalar cost must be computed.");
#endif
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index f972efa07eb7e..3470de8e56871 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -800,10 +800,34 @@ void VPRegionBlock::execute(VPTransformState *State) {
State->Lane.reset();
}
-InstructionCost VPBasicBlock::cost(ElementCount VF, VPCostContext &Ctx) {
+InstructionCost VPBasicBlock::cost(ElementCount VF, VPCostContext &Ctx,
+ bool CountsVecCalcOnly) {
InstructionCost Cost = 0;
- for (VPRecipeBase &R : Recipes)
- Cost += R.cost(VF, Ctx);
+ for (VPRecipeBase &R : Recipes) {
+ if (!CountsVecCalcOnly)
+ Cost += R.cost(VF, Ctx);
+ else {
+ switch (R.getVPDefID()) {
+ case VPDef::VPActiveLaneMaskPHISC:
+ case VPDef::VPBlendSC:
+ case VPDef::VPFirstOrderRecurrencePHISC:
+ case VPDef::VPPartialReductionSC:
+ case VPDef::VPReductionPHISC:
+ case VPDef::VPReductionSC:
+ case VPDef::VPWidenCallSC:
+ case VPDef::VPWidenCanonicalIVSC:
+ case VPDef::VPWidenCastSC:
+ case VPDef::VPWidenGEPSC:
+ case VPDef::VPWidenIntOrFpInductionSC:
+ case VPDef::VPWidenIntrinsicSC:
+ case VPDef::VPWidenPHISC:
+ case VPDef::VPWidenPointerInductionSC:
+ case VPDef::VPWidenSC:
+ case VPDef::VPWidenSelectSC:
+ Cost += R.cost(VF, Ctx);
+ }
+ }
+ }
return Cost;
}
@@ -826,11 +850,12 @@ const VPBasicBlock *VPBasicBlock::getCFGPredecessor(unsigned Idx) const {
return Pred->getExitingBasicBlock();
}
-InstructionCost VPRegionBlock::cost(ElementCount VF, VPCostContext &Ctx) {
+InstructionCost VPRegionBlock::cost(ElementCount VF, VPCostContext &Ctx,
+ bool CountsVecCalcOnly) {
if (!isReplicator()) {
InstructionCost Cost = 0;
for (VPBlockBase *Block : vp_depth_first_shallow(getEntry()))
- Cost += Block->cost(VF, Ctx);
+ Cost += Block->cost(VF, Ctx, CountsVecCalcOnly);
InstructionCost BackedgeCost =
ForceTargetInstructionCost.getNumOccurrences()
? InstructionCost(ForceTargetInstructionCost.getNumOccurrences())
@@ -853,7 +878,7 @@ InstructionCost VPRegionBlock::cost(ElementCount VF, VPCostContext &Ctx) {
// uniform condition.
using namespace llvm::VPlanPatternMatch;
VPBasicBlock *Then = cast<VPBasicBlock>(getEntry()->getSuccessors()[0]);
- InstructionCost ThenCost = Then->cost(VF, Ctx);
+ InstructionCost ThenCost = Then->cost(VF, Ctx, CountsVecCalcOnly);
// For the scalar case, we may not always execute the original predicated
// block, Thus, scale the block's cost by the probability of executing it.
@@ -1016,19 +1041,22 @@ void VPlan::execute(VPTransformState *State) {
}
}
-InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) {
+InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx,
+ bool CountsVecCalcOnly) {
// For now only return the cost of the vector loop region, ignoring any other
// blocks, like the preheader or middle blocks, expect for checking them for
// recipes with invalid costs.
- InstructionCost Cost = getVectorLoopRegion()->cost(VF, Ctx);
+ InstructionCost Cost =
+ getVectorLoopRegion()->cost(VF, Ctx, CountsVecCalcOnly);
// If the cost of the loop region is invalid or any recipe in the skeleton
// outside loop regions are invalid return an invalid cost.
- if (!Cost.isValid() || any_of(VPBlockUtils::blocksOnly<VPBasicBlock>(
- vp_depth_first_shallow(getEntry())),
- [&VF, &Ctx](VPBasicBlock *VPBB) {
- return !VPBB->cost(VF, Ctx).isValid();
- }))
+ if (!Cost.isValid() ||
+ any_of(VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_shallow(getEntry())),
+ [&VF, &Ctx, &CountsVecCalcOnly](VPBasicBlock *VPBB) {
+ return !VPBB->cost(VF, Ctx, CountsVecCalcOnly).isValid();
+ }))
return InstructionCost::getInvalid();
return Cost;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index d6bc462a0dfab..88f4f5dd24eaa 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -340,7 +340,8 @@ class LLVM_ABI_FOR_TEST VPBlockBase {
virtual void execute(VPTransformState *State) = 0;
/// Return the cost of the block.
- virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx) = 0;
+ virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx,
+ bool CountsVecCalcOnly = false) = 0;
/// Return true if it is legal to hoist instructions into this block.
bool isLegalToHoistInto() {
@@ -3716,7 +3717,8 @@ class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
void execute(VPTransformState *State) override;
/// Return the cost of this VPBasicBlock.
- InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
+ InstructionCost cost(ElementCount VF, VPCostContext &Ctx,
+ bool CountsVecCalcOnly) override;
/// Return the position of the first non-phi node recipe in the block.
iterator getFirstNonPhi();
@@ -3897,7 +3899,8 @@ class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
void execute(VPTransformState *State) override;
// Return the cost of this region.
- InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
+ InstructionCost cost(ElementCount VF, VPCostContext &Ctx,
+ bool CountsVecCalcOnly) override;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
@@ -4022,7 +4025,8 @@ class VPlan {
void execute(VPTransformState *State);
/// Return the cost of this plan.
- InstructionCost cost(ElementCount VF, VPCostContext &Ctx);
+ InstructionCost cost(ElementCount VF, VPCostContext &Ctx,
+ bool CountsVecCalcOnly = false);
VPBasicBlock *getEntry() { return Entry; }
const VPBasicBlock *getEntry() const { return Entry; }
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/maximize-bandwidth-conservatively.ll b/llvm/test/Transforms/LoopVectorize/AArch64/maximize-bandwidth-conservatively.ll
new file mode 100644
index 0000000000000..441669c5f6dc6
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/maximize-bandwidth-conservatively.ll
@@ -0,0 +1,58 @@
+; REQUIRES: asserts
+; RUN: opt < %s -mtriple aarch64-linux-gnu -mattr=+sve -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -debug-only=loop-vectorize 2>&1 | FileCheck %s
+; RUN: opt < %s -mtriple aarch64-linux-gnu -mattr=+sve -passes=loop-vectorize -vectorizer-maximize-bandwidth -vectorizer-maximize-bandwidth-conservatively -S -debug-only=loop-vectorize 2>&1 | FileCheck %s --check-prefix=CHECK-CONS
+
+define void @f(i32 %n, ptr noalias %a, ptr %b, ptr %c) {
+; The following loop is an example where choosing a larger vector width reduces
+; the number of instructions but may lead to performance degradation due to the
+; FP pipeline becoming a bottleneck.
+;
+; void f(int n, short *restrict a, long *b, double *c) {
+; for (int i = 0; i < n; i++) {
+; a[i] = b[i] + c[i];
+; }
+; }
+
+; In the usual cost model, vscale x 8 is chosen.
+; CHECK: Cost for VF vscale x 2: 8 (Estimated cost per lane: 4.0)
+; CHECK: Cost for VF vscale x 4: 14 (Estimated cost per lane: 3.5)
+; CHECK: Cost for VF vscale x 8: 26 (Estimated cost per lane: 3.2)
+; CHECK: LV: Selecting VF: vscale x 8.
+
+; In a conservative cost model, a larger vector width is chosen only if it is
+; superior when compared solely based on the cost of the FP pipeline, in
+; addition to the usual model.
+; CHECK-CONS: Cost for VF vscale x 2: 3 (Estimated cost per lane: 1.5)
+; CHECK-CONS: Cost for VF vscale x 4: 7 (Estimated cost per lane: 1.8)
+; CHECK-CONS: Cost for VF vscale x 8: 15 (Estimated cost per lane: 1.9)
+; CHECK-CONS: LV: Selecting VF: vscale x 2.
+
+entry:
+ %cmp10 = icmp sgt i32 %n, 0
+ br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader: ; preds = %entry
+ %wide.trip.count = zext nneg i32 %n to i64
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %for.body
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ ret void
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars....
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/156012
More information about the llvm-commits
mailing list