[llvm-branch-commits] [llvm] d945a2c - [AArch64][LoopVectorize] Introduce trip count minimal value threshold to ignore tail-folding.
Tobias Hieta via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Aug 11 23:37:02 PDT 2022
Author: Dinar Temirbulatov
Date: 2022-08-12T08:35:41+02:00
New Revision: d945a2c9efda8f0a2d8a03cf48d5b5dc4076769d
URL: https://github.com/llvm/llvm-project/commit/d945a2c9efda8f0a2d8a03cf48d5b5dc4076769d
DIFF: https://github.com/llvm/llvm-project/commit/d945a2c9efda8f0a2d8a03cf48d5b5dc4076769d.diff
LOG: [AArch64][LoopVectorize] Introduce trip count minimal value threshold to ignore tail-folding.
After D121595 was commited, I noticed regressions assosicated with small trip
count numbersvectorisation by tail folding with scalable vectors. As a solution
for those issues I propose to introduce the minimal trip count threshold value.
Differential Revision: https://reviews.llvm.org/D130755
(cherry picked from commit cab6cd68340255be241b7cf169c67a1899ced115)
Added:
Modified:
llvm/include/llvm/Analysis/TargetTransformInfo.h
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
llvm/lib/Analysis/TargetTransformInfo.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 6ea6d2361eba7..102b069ac722d 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1440,6 +1440,10 @@ class TargetTransformInfo {
/// to a stack reload.
unsigned getGISelRematGlobalCost() const;
+ /// \returns the lower bound of a trip count to decide on vectorization
+ /// while tail-folding.
+ unsigned getMinTripCountTailFoldingThreshold() const;
+
/// \returns True if the target supports scalable vectors.
bool supportsScalableVectors() const;
@@ -1830,6 +1834,7 @@ class TargetTransformInfo::Concept {
ReductionFlags) const = 0;
virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
virtual unsigned getGISelRematGlobalCost() const = 0;
+ virtual unsigned getMinTripCountTailFoldingThreshold() const = 0;
virtual bool enableScalableVectorization() const = 0;
virtual bool supportsScalableVectors() const = 0;
virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
@@ -2453,6 +2458,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
return Impl.getGISelRematGlobalCost();
}
+ unsigned getMinTripCountTailFoldingThreshold() const override {
+ return Impl.getMinTripCountTailFoldingThreshold();
+ }
+
bool supportsScalableVectors() const override {
return Impl.supportsScalableVectors();
}
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 1a75cb35549e4..da1f53aa33cb0 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -803,6 +803,8 @@ class TargetTransformInfoImplBase {
unsigned getGISelRematGlobalCost() const { return 1; }
+ unsigned getMinTripCountTailFoldingThreshold() const { return 0; }
+
bool supportsScalableVectors() const { return false; }
bool enableScalableVectorization() const { return false; }
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index cfa6e3a976264..143f03ccac391 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1108,6 +1108,10 @@ unsigned TargetTransformInfo::getGISelRematGlobalCost() const {
return TTIImpl->getGISelRematGlobalCost();
}
+unsigned TargetTransformInfo::getMinTripCountTailFoldingThreshold() const {
+ return TTIImpl->getMinTripCountTailFoldingThreshold();
+}
+
bool TargetTransformInfo::supportsScalableVectors() const {
return TTIImpl->supportsScalableVectors();
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 2231f8705998d..0c5eadeffcdbd 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -334,6 +334,10 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
return 2;
}
+ unsigned getMinTripCountTailFoldingThreshold() const {
+ return ST->hasSVE() ? 5 : 0;
+ }
+
PredicationStyle emitGetActiveLaneMask() const {
if (ST->hasSVE())
return PredicationStyle::DataAndControlFlow;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 238b074089aa1..91bc7dbad1d04 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -10109,8 +10109,19 @@ bool LoopVectorizePass::processLoop(Loop *L) {
if (Hints.getForce() == LoopVectorizeHints::FK_Enabled)
LLVM_DEBUG(dbgs() << " But vectorizing was explicitly forced.\n");
else {
- LLVM_DEBUG(dbgs() << "\n");
- SEL = CM_ScalarEpilogueNotAllowedLowTripLoop;
+ if (*ExpectedTC > TTI->getMinTripCountTailFoldingThreshold()) {
+ LLVM_DEBUG(dbgs() << "\n");
+ SEL = CM_ScalarEpilogueNotAllowedLowTripLoop;
+ } else {
+ LLVM_DEBUG(dbgs() << " But the target considers the trip count too "
+ "small to consider vectorizing.\n");
+ reportVectorizationFailure(
+ "The trip count is below the minial threshold value.",
+ "loop trip count is too low, avoiding vectorization",
+ "LowTripCount", ORE, L);
+ Hints.emitRemarkWithHints();
+ return false;
+ }
}
}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll
index a97782aac2845..bc3e1c2513a11 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll
@@ -40,18 +40,22 @@ for.end: ; preds = %for.body
define void @trip5_i8(i8* noalias nocapture noundef %dst, i8* noalias nocapture noundef readonly %src) #0 {
; CHECK-LABEL: @trip5_i8(
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
-; CHECK: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ {{%.*}}, %vector.ph ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %vector.body ]
-; CHECK: {{%.*}} = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* {{%.*}}, i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i8> poison)
-; CHECK: {{%.*}} = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* {{%.*}}, i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i8> poison)
-; CHECK: call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> {{%.*}}, <vscale x 16 x i8>* {{%.*}}, i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]])
-; CHECK: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[VF:%.*]] = mul i64 [[VSCALE]], 16
-; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[VF]]
-; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_NEXT]], i64 5)
-; CHECK-NEXT: [[ACTIVE_LANE_MASK_NOT:%.*]] = xor <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i32 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer)
-; CHECK-NEXT: br i1 true, label %middle.block, label %vector.body
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[SRC:%.*]], i64 [[I_08]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP0]], 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[DST:%.*]], i64 [[I_08]]
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP1]]
+; CHECK-NEXT: store i8 [[ADD]], i8* [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 5
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
;
entry:
br label %for.body
More information about the llvm-branch-commits
mailing list