[llvm] [VPlan] Track VPValues instead of VPRecipes in calculateRegisterUsage. (PR #155301)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 25 14:23:16 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-powerpc
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
Update calculateRegisterUsageForPlan to track live-ness of VPValues instead of recipes. This gives slightly more accurate results for recipes that define multiple values (i.e. VPInterleaveRecipe).
When tracking the live-ness of recipes, all VPValues defined by an VPInterleaveRecipe are considered alive until the last use of any of them. When tracking the live-ness of individual VPValues, we can accurately track the individual values until their last use.
Note the changes in large-loop-rdx.ll and pr47437.ll. This patch restores the original behavior before introducing VPlan-based liveness tracking.
---
Patch is 56.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155301.diff
5 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+4-1)
- (modified) llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp (+38-36)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/reg-usage.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll (+207-55)
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr47437.ll (+92-16)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 490f6391c15a0..defeb80118337 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4891,7 +4891,9 @@ static bool shouldUnrollMultiExitLoop(Loop *L, ScalarEvolution &SE,
// a symbolic expression. Multi-exit loops with small known trip counts will
// likely be unrolled anyway.
const SCEV *BTC = SE.getSymbolicMaxBackedgeTakenCount(L);
- if (isa<SCEVConstant>(BTC) || isa<SCEVCouldNotCompute>(BTC))
+ if ((isa<SCEVConstant>(BTC) &&
+ !isa<SCEVConstant>(SE.getExitCount(L, L->getLoopLatch()))) ||
+ isa<SCEVCouldNotCompute>(BTC))
return false;
// It might not be worth unrolling loops with low max trip counts. Restrict
@@ -5111,6 +5113,7 @@ void AArch64TTIImpl::getUnrollingPreferences(
// Allow slightly more costly trip-count expansion to catch search loops
// with pointer inductions.
UP.SCEVExpansionBudget = 5;
+ UP.Partial = isa<SCEVConstant>(SE.getExitCount(L, L->getLoopLatch()));
return;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 747c6623aa22a..8e5997fa8befe 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -10,6 +10,7 @@
#include "VPlan.h"
#include "VPlanCFG.h"
#include "VPlanDominatorTree.h"
+#include "VPlanHelpers.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -396,7 +397,7 @@ bool VPDominatorTree::properlyDominates(const VPRecipeBase *A,
/// Get the VF scaling factor applied to the recipe's output, if the recipe has
/// one.
-static unsigned getVFScaleFactor(VPRecipeBase *R) {
+static unsigned getVFScaleFactor(VPValue *R) {
if (auto *RR = dyn_cast<VPReductionPHIRecipe>(R))
return RR->getVFScaleFactor();
if (auto *RR = dyn_cast<VPPartialReductionRecipe>(R))
@@ -422,15 +423,15 @@ SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan(
const SmallPtrSetImpl<const Value *> &ValuesToIgnore) {
// Each 'key' in the map opens a new interval. The values
// of the map are the index of the 'last seen' usage of the
- // recipe that is the key.
- using IntervalMap = SmallDenseMap<VPRecipeBase *, unsigned, 16>;
+ // VPValue that is the key.
+ using IntervalMap = SmallDenseMap<VPValue *, unsigned, 16>;
// Maps indices to recipes.
SmallVector<VPRecipeBase *, 64> Idx2Recipe;
// Marks the end of each interval.
IntervalMap EndPoint;
- // Saves the list of recipe indices that are used in the loop.
- SmallPtrSet<VPRecipeBase *, 8> Ends;
+ // Saves the list of VPValues that are used in the loop.
+ SmallPtrSet<VPValue *, 8> Ends;
// Saves the list of values that are used in the loop but are defined outside
// the loop (not including non-recipe values such as arguments and
// constants).
@@ -441,7 +442,7 @@ SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan(
// each recipe. We use RPO to ensure that defs are met before their users. We
// assume that each recipe that has in-loop users starts an interval. We
// record every time that an in-loop value is used, so we have a list of the
- // first and last occurrences of each recipe.
+ // first occurences of each recipe and last occurrence of each VPValue.
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
LoopRegion);
@@ -470,32 +471,32 @@ SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan(
}
// Overwrite previous end points.
- EndPoint[DefR] = Idx2Recipe.size();
- Ends.insert(DefR);
+ EndPoint[U] = Idx2Recipe.size();
+ Ends.insert(U);
}
}
if (VPBB == LoopRegion->getExiting()) {
// VPWidenIntOrFpInductionRecipes are used implicitly at the end of the
// exiting block, where their increment will get materialized eventually.
for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
- if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
- EndPoint[&R] = Idx2Recipe.size();
- Ends.insert(&R);
+ if (auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
+ EndPoint[WideIV] = Idx2Recipe.size();
+ Ends.insert(WideIV);
}
}
}
}
// Saves the list of intervals that end with the index in 'key'.
- using RecipeList = SmallVector<VPRecipeBase *, 2>;
- SmallDenseMap<unsigned, RecipeList, 16> TransposeEnds;
+ using VPValueList = SmallVector<VPValue *, 2>;
+ SmallDenseMap<unsigned, VPValueList, 16> TransposeEnds;
// Next, we transpose the EndPoints into a multi map that holds the list of
// intervals that *end* at a specific location.
for (auto &Interval : EndPoint)
TransposeEnds[Interval.second].push_back(Interval.first);
- SmallPtrSet<VPRecipeBase *, 8> OpenIntervals;
+ SmallPtrSet<VPValue *, 8> OpenIntervals;
SmallVector<VPRegisterUsage, 8> RUs(VFs.size());
SmallVector<SmallMapVector<unsigned, unsigned, 4>, 8> MaxUsages(VFs.size());
@@ -519,14 +520,16 @@ SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan(
for (unsigned int Idx = 0, Sz = Idx2Recipe.size(); Idx < Sz; ++Idx) {
VPRecipeBase *R = Idx2Recipe[Idx];
- // Remove all of the recipes that end at this location.
- RecipeList &List = TransposeEnds[Idx];
- for (VPRecipeBase *ToRemove : List)
+ // Remove all of the VPValues that end at this location.
+ VPValueList &List = TransposeEnds[Idx];
+ for (VPValue *ToRemove : List)
OpenIntervals.erase(ToRemove);
// Ignore recipes that are never used within the loop and do not have side
// effects.
- if (!Ends.count(R) && !R->mayHaveSideEffects())
+ if (all_of(R->definedValues(),
+ [&Ends](VPValue *Def) { return !Ends.count(Def); }) &&
+ !R->mayHaveSideEffects())
continue;
// Skip recipes for ignored values.
@@ -546,41 +549,38 @@ SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan(
// there is no previous entry for ClassID.
SmallMapVector<unsigned, unsigned, 4> RegUsage;
- for (auto *R : OpenIntervals) {
- // Skip recipes that weren't present in the original loop.
+ for (auto *VPV : OpenIntervals) {
+ // Skip values that weren't present in the original loop.
// TODO: Remove after removing the legacy
// LoopVectorizationCostModel::calculateRegisterUsage
if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe,
- VPBranchOnMaskRecipe>(R))
+ VPBranchOnMaskRecipe>(VPV))
continue;
if (VFs[J].isScalar() ||
isa<VPCanonicalIVPHIRecipe, VPReplicateRecipe, VPDerivedIVRecipe,
- VPEVLBasedIVPHIRecipe, VPScalarIVStepsRecipe>(R) ||
- (isa<VPInstruction>(R) &&
- vputils::onlyScalarValuesUsed(cast<VPSingleDefRecipe>(R))) ||
- (isa<VPReductionPHIRecipe>(R) &&
- (cast<VPReductionPHIRecipe>(R))->isInLoop())) {
- unsigned ClassID = TTI.getRegisterClassForType(
- false, TypeInfo.inferScalarType(R->getVPSingleValue()));
+ VPEVLBasedIVPHIRecipe, VPScalarIVStepsRecipe>(VPV) ||
+ (isa<VPInstruction>(VPV) && vputils::onlyScalarValuesUsed(VPV)) ||
+ (isa<VPReductionPHIRecipe>(VPV) &&
+ (cast<VPReductionPHIRecipe>(VPV))->isInLoop())) {
+ unsigned ClassID =
+ TTI.getRegisterClassForType(false, TypeInfo.inferScalarType(VPV));
// FIXME: The target might use more than one register for the type
// even in the scalar case.
RegUsage[ClassID] += 1;
} else {
// The output from scaled phis and scaled reductions actually has
// fewer lanes than the VF.
- unsigned ScaleFactor = getVFScaleFactor(R);
+ unsigned ScaleFactor = getVFScaleFactor(VPV);
ElementCount VF = VFs[J].divideCoefficientBy(ScaleFactor);
LLVM_DEBUG(if (VF != VFs[J]) {
dbgs() << "LV(REG): Scaled down VF from " << VFs[J] << " to " << VF
<< " for " << *R << "\n";
});
- for (VPValue *DefV : R->definedValues()) {
- Type *ScalarTy = TypeInfo.inferScalarType(DefV);
- unsigned ClassID = TTI.getRegisterClassForType(true, ScalarTy);
- RegUsage[ClassID] += GetRegUsage(ScalarTy, VF);
- }
+ Type *ScalarTy = TypeInfo.inferScalarType(VPV);
+ unsigned ClassID = TTI.getRegisterClassForType(true, ScalarTy);
+ RegUsage[ClassID] += GetRegUsage(ScalarTy, VF);
}
}
@@ -593,8 +593,10 @@ SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan(
LLVM_DEBUG(dbgs() << "LV(REG): At #" << Idx << " Interval # "
<< OpenIntervals.size() << '\n');
- // Add the current recipe to the list of open intervals.
- OpenIntervals.insert(R);
+ // Add the VPValues defined by the current recipe to the list of open
+ // intervals.
+ for (VPValue *DefV : R->definedValues())
+ OpenIntervals.insert(DefV);
}
// We also search for instructions that are defined outside the loop, but are
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reg-usage.ll
index e51a925040a49..23cc7e367776a 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/reg-usage.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/reg-usage.ll
@@ -74,7 +74,7 @@ define dso_local void @dotp_high_register_pressure(ptr %a, ptr %b, ptr %sum, i32
; CHECK: LV(REG): VF = 16
; CHECK-NEXT: LV(REG): Found max usage: 2 item
; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 3 registers
-; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 48 registers
+; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 47 registers
; CHECK-NEXT: LV(REG): Found invariant usage: 1 item
entry:
%cmp100 = icmp sgt i32 %n, 0
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll
index 0b23206134bc0..43cce8005bbf6 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll
@@ -10,28 +10,43 @@ define void @QLA_F3_r_veq_norm2_V(ptr noalias %r, ptr noalias %a, i32 %n) {
; CHECK-SAME: ptr noalias [[R:%.*]], ptr noalias [[A:%.*]], i32 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[CMP24:%.*]] = icmp sgt i32 [[N]], 0
-; CHECK-NEXT: br i1 [[CMP24]], label %[[FOR_COND1_PREHEADER_PREHEADER:.*]], label %[[FOR_END13:.*]]
-; CHECK: [[FOR_COND1_PREHEADER_PREHEADER]]:
+; CHECK-NEXT: br i1 [[CMP24]], label %[[ITER_CHECK:.*]], label %[[FOR_END13:.*]]
+; CHECK: [[ITER_CHECK]]:
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
+; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
+; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 16
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 8
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP69:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP65:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP66:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP67:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP65:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP66:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP131:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP25:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP132:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP26:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP133:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP27:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP134:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP28:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP135:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 6
+; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 8
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 10
+; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 12
+; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 14
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x { float, float }], ptr [[A]], i64 [[INDEX]], i64 0, i32 0
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x { float, float }], ptr [[A]], i64 [[TMP1]], i64 0, i32 0
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x { float, float }], ptr [[A]], i64 [[TMP2]], i64 0, i32 0
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x { float, float }], ptr [[A]], i64 [[TMP3]], i64 0, i32 0
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x { float, float }], ptr [[A]], i64 [[TMP4]], i64 0, i32 0
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x { float, float }], ptr [[A]], i64 [[TMP5]], i64 0, i32 0
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x { float, float }], ptr [[A]], i64 [[TMP6]], i64 0, i32 0
+; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x { float, float }], ptr [[A]], i64 [[TMP7]], i64 0, i32 0
; CHECK-NEXT: [[WIDE_VEC35:%.*]] = load <12 x float>, ptr [[TMP13]], align 8
; CHECK-NEXT: [[STRIDED_VEC36:%.*]] = shufflevector <12 x float> [[WIDE_VEC35]], <12 x float> poison, <2 x i32> <i32 0, i32 6>
; CHECK-NEXT: [[STRIDED_VEC37:%.*]] = shufflevector <12 x float> [[WIDE_VEC35]], <12 x float> poison, <2 x i32> <i32 1, i32 7>
@@ -60,116 +75,252 @@ define void @QLA_F3_r_veq_norm2_V(ptr noalias %r, ptr noalias %a, i32 %n) {
; CHECK-NEXT: [[STRIDED_VEC60:%.*]] = shufflevector <12 x float> [[WIDE_VEC56]], <12 x float> poison, <2 x i32> <i32 3, i32 9>
; CHECK-NEXT: [[STRIDED_VEC61:%.*]] = shufflevector <12 x float> [[WIDE_VEC56]], <12 x float> poison, <2 x i32> <i32 4, i32 10>
; CHECK-NEXT: [[STRIDED_VEC62:%.*]] = shufflevector <12 x float> [[WIDE_VEC56]], <12 x float> poison, <2 x i32> <i32 5, i32 11>
+; CHECK-NEXT: [[WIDE_VEC36:%.*]] = load <12 x float>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[STRIDED_VEC42:%.*]] = shufflevector <12 x float> [[WIDE_VEC36]], <12 x float> poison, <2 x i32> <i32 0, i32 6>
+; CHECK-NEXT: [[STRIDED_VEC49:%.*]] = shufflevector <12 x float> [[WIDE_VEC36]], <12 x float> poison, <2 x i32> <i32 1, i32 7>
+; CHECK-NEXT: [[STRIDED_VEC56:%.*]] = shufflevector <12 x float> [[WIDE_VEC36]], <12 x float> poison, <2 x i32> <i32 2, i32 8>
+; CHECK-NEXT: [[STRIDED_VEC63:%.*]] = shufflevector <12 x float> [[WIDE_VEC36]], <12 x float> poison, <2 x i32> <i32 3, i32 9>
+; CHECK-NEXT: [[STRIDED_VEC64:%.*]] = shufflevector <12 x float> [[WIDE_VEC36]], <12 x float> poison, <2 x i32> <i32 4, i32 10>
+; CHECK-NEXT: [[STRIDED_VEC65:%.*]] = shufflevector <12 x float> [[WIDE_VEC36]], <12 x float> poison, <2 x i32> <i32 5, i32 11>
+; CHECK-NEXT: [[WIDE_VEC43:%.*]] = load <12 x float>, ptr [[TMP17]], align 8
+; CHECK-NEXT: [[STRIDED_VEC66:%.*]] = shufflevector <12 x float> [[WIDE_VEC43]], <12 x float> poison, <2 x i32> <i32 0, i32 6>
+; CHECK-NEXT: [[STRIDED_VEC67:%.*]] = shufflevector <12 x float> [[WIDE_VEC43]], <12 x float> poison, <2 x i32> <i32 1, i32 7>
+; CHECK-NEXT: [[STRIDED_VEC68:%.*]] = shufflevector <12 x float> [[WIDE_VEC43]], <12 x float> poison, <2 x i32> <i32 2, i32 8>
+; CHECK-NEXT: [[STRIDED_VEC69:%.*]] = shufflevector <12 x float> [[WIDE_VEC43]], <12 x float> poison, <2 x i32> <i32 3, i32 9>
+; CHECK-NEXT: [[STRIDED_VEC70:%.*]] = shufflevector <12 x float> [[WIDE_VEC43]], <12 x float> poison, <2 x i32> <i32 4, i32 10>
+; CHECK-NEXT: [[STRIDED_VEC71:%.*]] = shufflevector <12 x float> [[WIDE_VEC43]], <12 x float> poison, <2 x i32> <i32 5, i32 11>
+; CHECK-NEXT: [[WIDE_VEC50:%.*]] = load <12 x float>, ptr [[TMP18]], align 8
+; CHECK-NEXT: [[STRIDED_VEC72:%.*]] = shufflevector <12 x float> [[WIDE_VEC50]], <12 x float> poison, <2 x i32> <i32 0, i32 6>
+; CHECK-NEXT: [[STRIDED_VEC73:%.*]] = shufflevector <12 x float> [[WIDE_VEC50]], <12 x float> poison, <2 x i32> <i32 1, i32 7>
+; CHECK-NEXT: [[STRIDED_VEC80:%.*]] = shufflevector <12 x float> [[WIDE_VEC50]], <12 x float> poison, <2 x i32> <i32 2, i32 8>
+; CHECK-NEXT: [[STRIDED_VEC81:%.*]] = shufflevector <12 x float> [[WIDE_VEC50]], <12 x float> poison, <2 x i32> <i32 3, i32 9>
+; CHECK-NEXT: [[STRIDED_VEC82:%.*]] = shufflevector <12 x float> [[WIDE_VEC50]], <12 x float> poison, <2 x i32> <i32 4, i32 10>
+; CHECK-NEXT: [[STRIDED_VEC83:%.*]] = shufflevector <12 x float> [[WIDE_VEC50]], <12 x float> poison, <2 x i32> <i32 5, i32 11>
+; CHECK-NEXT: [[WIDE_VEC57:%.*]] = load <12 x float>, ptr [[TMP19]], align 8
+; CHECK-NEXT: [[STRIDED_VEC84:%.*]] = shufflevector <12 x float> [[WIDE_VEC57]], <12 x float> poison, <2 x i32> <i32 0, i32 6>
+; CHECK-NEXT: [[STRIDED_VEC85:%.*]] = shufflevector <12 x float> [[WIDE_VEC57]], <12 x float> poison, <2 x i32> <i32 1, i32 7>
+; CHECK-NEXT: [[STRIDED_VEC86:%.*]] = shufflevector <12 x float> [[WIDE_VEC57]], <12 x float> poison, <2 x i32> <i32 2, i32 8>
+; CHECK-NEXT: [[STRIDED_VEC87:%.*]] = shufflevector <12 x float> [[WIDE_VEC57]], <12 x float> poison, <2 x i32> <i32 3, i32 9>
+; CHECK-NEXT: [[STRIDED_VEC88:%.*]] = shufflevector <12 x float> [[WIDE_VEC57]], <12 x float> poison, <2 x i32> <i32 4, i32 10>
+; CHECK-NEXT: [[STRIDED_VEC89:%.*]] = shufflevector <12 x float> [[WIDE_VEC57]], <12 x float> poison, <2 x i32> <i32 5, i32 11>
; CHECK-NEXT: [[TMP64:%.*]] = fmul fast <2 x float> [[STRIDED_VEC36]], [[STRIDED_VEC36]]
; CHECK-NEXT: [[TMP97:%.*]] = fmul fast <2 x float> [[STRIDED_VEC43]], [[STRIDED_VEC43]]
; CHECK-NEXT: [[TMP98:%.*]] = fmul fast <2 x float> [[STRIDED_VEC50]], [[STRIDED_VEC50]]
; CHECK-NEXT: [[TMP99:%.*]] = fmul fast <2 x float> [[STRIDED_VEC57]], [[STRIDED_VEC57]]
+; CHECK-NEXT: [[TMP100:%.*]] = fmul fast <2 x float> [[STRIDED_VEC42]], [[STRIDED_VEC42]]
+; CHECK-NEXT: [[TMP101:%.*]] = fmul fast <2 x float> [[STRIDED_VEC66]], [[STRIDED_VEC66]]
+; CHECK-NEXT: [[TMP102:%.*]] = fmul fast <2 x float> [[STRIDED_VEC72]], [[STRIDED_VEC72]]
+; CHECK-NEXT: [[TMP103:%.*]] = fmul fast <2 x float> [[STRIDED_VEC84]], [[STRIDED_VEC84]]
; CHECK-NEXT: [[TMP72:%.*]] = fmul fast <2 x float> [[STRIDED_VEC37]], [[STRIDED_VEC37]]
; CHECK-NEXT: [[TMP105:%.*]] = fmul fast <2 x float> [[STRIDED_VEC44]], [[STRIDED_VEC44]]
; CHECK-NEXT: [[TMP106:%.*]] = fmul fast <2 x float> [[STRIDED_VEC51]], [[STRIDED_VEC51]]
; CHECK-NEXT: [[TMP107:%.*]] = fmul fast <2 x float> [[STRIDED_VEC58]], [[STRIDED_VEC58]]
+; CHECK-NEXT: [[TMP108:%.*]] = fmul fast <2 x float> [[STRIDED_VEC49]], [[STRIDED_VEC49]]
+; CHECK-NEXT: [[TMP109:%.*]] = fmul fa...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/155301
More information about the llvm-commits
mailing list