[llvm] 4ee45ab - [LV] Invalidate cost model decisions along with interleave groups.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sat Apr 18 02:24:29 PDT 2020
Author: Florian Hahn
Date: 2020-04-18T10:23:49+01:00
New Revision: 4ee45ab60f8639375296f8b7b96e2eb5e8a2c9d3
URL: https://github.com/llvm/llvm-project/commit/4ee45ab60f8639375296f8b7b96e2eb5e8a2c9d3
DIFF: https://github.com/llvm/llvm-project/commit/4ee45ab60f8639375296f8b7b96e2eb5e8a2c9d3.diff
LOG: [LV] Invalidate cost model decisions along with interleave groups.
Cost-modeling decisions are tied to the compute interleave groups
(widening decisions, scalar and uniform values). When invalidating the
interleave groups, those decisions also need to be invalidated.
Otherwise there is a mis-match during VPlan construction.
VPWidenMemoryRecipes created initially are left around w/o converting them
into VPInterleave recipes. Such a conversion indeed should not take place,
and these gather/scatter recipes may in fact be right. The crux is leaving around
obsolete CM_Interleave (and dependent) markings of instructions along with
their costs, instead of recalculating decisions, costs, and recipes.
Alternatively to forcing a complete recompute later on, we could try
to selectively invalidate the decisions connected to the interleave
groups. But we would likely need to run the uniform/scalar value
detection parts again anyways and the extra complexity is probably not
worth it.
Fixes PR45572.
Reviewers: gilr, rengolin, Ayal, hsaito
Reviewed By: Ayal
Differential Revision: https://reviews.llvm.org/D78298
Added:
llvm/test/Transforms/LoopVectorize/Hexagon/invalidate-cm-after-invalidating-interleavegroups.ll
Modified:
llvm/include/llvm/Analysis/VectorUtils.h
llvm/lib/Analysis/VectorUtils.cpp
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h
index 2b680e8131c4..89a2ea292209 100644
--- a/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/llvm/include/llvm/Analysis/VectorUtils.h
@@ -698,7 +698,7 @@ class InterleavedAccessInfo {
const LoopAccessInfo *LAI)
: PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}
- ~InterleavedAccessInfo() { reset(); }
+ ~InterleavedAccessInfo() { invalidateGroups(); }
/// Analyze the interleaved accesses and collect them in interleave
/// groups. Substitute symbolic strides using \p Strides.
@@ -709,16 +709,24 @@ class InterleavedAccessInfo {
/// Invalidate groups, e.g., in case all blocks in loop will be predicated
/// contrary to original assumption. Although we currently prevent group
/// formation for predicated accesses, we may be able to relax this limitation
- /// in the future once we handle more complicated blocks.
- void reset() {
+ /// in the future once we handle more complicated blocks. Returns true if any
+ /// groups were invalidated.
+ bool invalidateGroups() {
+ if (InterleaveGroups.empty()) {
+ assert(
+ !RequiresScalarEpilogue &&
+ "RequiresScalarEpilog should not be set without interleave groups");
+ return false;
+ }
+
InterleaveGroupMap.clear();
for (auto *Ptr : InterleaveGroups)
delete Ptr;
InterleaveGroups.clear();
RequiresScalarEpilogue = false;
+ return true;
}
-
/// Check if \p Instr belongs to any interleave group.
bool isInterleaved(Instruction *Instr) const {
return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end();
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 0d411485ddd9..7cbcb17a9a3d 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -1243,6 +1243,8 @@ void InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue() {
if (Group->requiresScalarEpilogue())
DelSet.insert(Group);
}
+ assert(!DelSet.empty() && "At least one group must be invalidated, as a "
+ "scalar epilogue was required");
for (auto *Ptr : DelSet) {
LLVM_DEBUG(
dbgs()
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 7eaab8b0b739..27e3c93f7af8 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1309,6 +1309,13 @@ class LoopVectorizationCostModel {
/// i.e. either vector version isn't available, or is too expensive.
unsigned getVectorCallCost(CallInst *CI, unsigned VF, bool &NeedToScalarize);
+ /// Invalidates decisions already taken by the cost model.
+ void invalidateCostModelingDecisions() {
+ WideningDecisions.clear();
+ Uniforms.clear();
+ Scalars.clear();
+ }
+
private:
unsigned NumPredStores = 0;
@@ -4977,8 +4984,13 @@ Optional<unsigned> LoopVectorizationCostModel::computeMaxVF() {
// Invalidate interleave groups that require an epilogue if we can't mask
// the interleave-group.
- if (!useMaskedInterleavedAccesses(TTI))
+ if (!useMaskedInterleavedAccesses(TTI)) {
+ assert(WideningDecisions.empty() && Uniforms.empty() && Scalars.empty() &&
+ "No decisions should have been taken at this point");
+ // Note: There is no need to invalidate any cost modeling decisions here, as
+ // non where taken so far.
InterleaveInfo.invalidateGroupsRequiringScalarEpilogue();
+ }
unsigned MaxVF = computeFeasibleMaxVF(TC);
if (TC > 0 && TC % MaxVF == 0) {
@@ -6517,7 +6529,11 @@ Optional<VectorizationFactor> LoopVectorizationPlanner::plan(unsigned UserVF) {
dbgs()
<< "LV: Invalidate all interleaved groups due to fold-tail by masking "
"which requires masked-interleaved support.\n");
- CM.InterleaveInfo.reset();
+ if (CM.InterleaveInfo.invalidateGroups())
+ // Invalidating interleave groups also requires invalidating all decisions
+ // based on them, which includes widening decisions and uniform and scalar
+ // values.
+ CM.invalidateCostModelingDecisions();
}
if (UserVF) {
diff --git a/llvm/test/Transforms/LoopVectorize/Hexagon/invalidate-cm-after-invalidating-interleavegroups.ll b/llvm/test/Transforms/LoopVectorize/Hexagon/invalidate-cm-after-invalidating-interleavegroups.ll
new file mode 100644
index 000000000000..5ce7eab25156
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/Hexagon/invalidate-cm-after-invalidating-interleavegroups.ll
@@ -0,0 +1,96 @@
+; RUN: opt -loop-vectorize -hexagon-autohvx=1 -force-vector-width=64 -prefer-predicate-over-epilog -S %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+target triple = "hexagon"
+
+; Test for PR45572.
+
+; Check that interleave groups and decisions based on them are correctly
+; invalidated with tail-folding on platforms where masked interleaved accesses
+; are disabled.
+
+; Make sure a vector body has been created, 64 element vectors are used and a block predicate has been computed.
+; Also make sure the loads are not widened.
+
+; CHECK-LABEL: @test1
+; CHECK: vector.body:
+; CHECK: %induction = add <64 x i32>
+; CHECK: icmp ule <64 x i32> %induction
+; CHECK-NOT: load <{{.*}} x i32>
+
+
+define void @test1(i32* %arg, i32 %N) #0 {
+entry:
+ %tmp = alloca i8
+ br label %loop
+
+loop: ; preds = %bb2, %bb
+ %iv = phi i32 [ %iv.next, %loop], [ 0, %entry ]
+ %idx.mul = mul nuw nsw i32 %iv, 7
+ %idx.start = add nuw nsw i32 %idx.mul, 1
+ %tmp6 = getelementptr inbounds i32, i32* %arg, i32 %idx.start
+ %tmp7 = load i32, i32* %tmp6, align 4
+ %tmp8 = add nuw nsw i32 %idx.start, 1
+ %tmp9 = getelementptr inbounds i32, i32* %arg, i32 %tmp8
+ %tmp10 = load i32, i32* %tmp9, align 4
+ %tmp11 = add nuw nsw i32 %idx.start, 2
+ %tmp12 = getelementptr inbounds i32, i32* %arg, i32 %tmp11
+ %tmp13 = load i32, i32* %tmp12, align 4
+ %tmp14 = add nuw nsw i32 %idx.start, 3
+ %tmp15 = getelementptr inbounds i32, i32* %arg, i32 %tmp14
+ %tmp16 = load i32, i32* %tmp15, align 4
+ %tmp18 = add nuw nsw i32 %idx.start, 4
+ %tmp19 = getelementptr inbounds i32, i32* %arg, i32 %tmp18
+ %tmp20 = load i32, i32* %tmp19, align 4
+ %tmp21 = add nuw nsw i32 %idx.start, 5
+ %tmp22 = getelementptr inbounds i32, i32* %arg, i32 %tmp21
+ %tmp23 = load i32, i32* %tmp22, align 4
+ %tmp25 = add nuw nsw i32 %idx.start, 6
+ %tmp26 = getelementptr inbounds i32, i32* %arg, i32 %tmp25
+ %tmp27 = load i32, i32* %tmp26, align 4
+ store i8 0, i8* %tmp, align 1
+ %iv.next= add nuw nsw i32 %iv, 1
+ %exit.cond = icmp eq i32 %iv.next, %N
+ br i1 %exit.cond, label %exit, label %loop
+
+exit: ; preds = %loop
+ ret void
+}
+
+; The loop below only requires tail folding due to interleave groups with gaps.
+; Make sure the loads are not widened.
+
+; CHECK-LABEL: @test2
+; CHECK: vector.body:
+; CHECK-NOT: load <{{.*}} x i32>
+define void @test2(i32* %arg) #1 {
+entry:
+ %tmp = alloca i8
+ br label %loop
+
+loop: ; preds = %bb2, %bb
+ %iv = phi i32 [ %iv.next, %loop], [ 0, %entry ]
+ %idx.start = mul nuw nsw i32 %iv, 5
+ %tmp6 = getelementptr inbounds i32, i32* %arg, i32 %idx.start
+ %tmp7 = load i32, i32* %tmp6, align 4
+ %tmp8 = add nuw nsw i32 %idx.start, 1
+ %tmp9 = getelementptr inbounds i32, i32* %arg, i32 %tmp8
+ %tmp10 = load i32, i32* %tmp9, align 4
+ %tmp11 = add nuw nsw i32 %idx.start, 2
+ %tmp12 = getelementptr inbounds i32, i32* %arg, i32 %tmp11
+ %tmp13 = load i32, i32* %tmp12, align 4
+ %tmp14 = add nuw nsw i32 %idx.start, 3
+ %tmp15 = getelementptr inbounds i32, i32* %arg, i32 %tmp14
+ %tmp16 = load i32, i32* %tmp15, align 4
+ store i8 0, i8* %tmp, align 1
+ %iv.next= add nuw nsw i32 %iv, 1
+ %exit.cond = icmp eq i32 %iv.next, 128
+ br i1 %exit.cond, label %exit, label %loop
+
+exit: ; preds = %loop
+ ret void
+}
+
+
+attributes #0 = { "target-features"="+hvx,+hvx-length128b" }
+attributes #1 = { optsize "target-features"="+hvx,+hvx-length128b" }
More information about the llvm-commits
mailing list