[llvm] cab6cd6 - [AArch64][LoopVectorize] Introduce trip count minimal value threshold to ignore tail-folding.
Dinar Temirbulatov via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 9 14:11:12 PDT 2022
Author: Dinar Temirbulatov
Date: 2022-08-09T22:10:17+01:00
New Revision: cab6cd68340255be241b7cf169c67a1899ced115
URL: https://github.com/llvm/llvm-project/commit/cab6cd68340255be241b7cf169c67a1899ced115
DIFF: https://github.com/llvm/llvm-project/commit/cab6cd68340255be241b7cf169c67a1899ced115.diff
LOG: [AArch64][LoopVectorize] Introduce trip count minimal value threshold to ignore tail-folding.
After D121595 was commited, I noticed regressions assosicated with small trip
count numbersvectorisation by tail folding with scalable vectors. As a solution
for those issues I propose to introduce the minimal trip count threshold value.
Differential Revision: https://reviews.llvm.org/D130755
Added:
Modified:
llvm/include/llvm/Analysis/TargetTransformInfo.h
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
llvm/lib/Analysis/TargetTransformInfo.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 43c2fce688452..c838df90aeb98 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1465,6 +1465,10 @@ class TargetTransformInfo {
/// to a stack reload.
unsigned getGISelRematGlobalCost() const;
+ /// \returns the lower bound of a trip count to decide on vectorization
+ /// while tail-folding.
+ unsigned getMinTripCountTailFoldingThreshold() const;
+
/// \returns True if the target supports scalable vectors.
bool supportsScalableVectors() const;
@@ -1863,6 +1867,7 @@ class TargetTransformInfo::Concept {
ReductionFlags) const = 0;
virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
virtual unsigned getGISelRematGlobalCost() const = 0;
+ virtual unsigned getMinTripCountTailFoldingThreshold() const = 0;
virtual bool enableScalableVectorization() const = 0;
virtual bool supportsScalableVectors() const = 0;
virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
@@ -2502,6 +2507,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
return Impl.getGISelRematGlobalCost();
}
+ unsigned getMinTripCountTailFoldingThreshold() const override {
+ return Impl.getMinTripCountTailFoldingThreshold();
+ }
+
bool supportsScalableVectors() const override {
return Impl.supportsScalableVectors();
}
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index cf0995a9b0fe0..abc9116a5a2a4 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -817,6 +817,8 @@ class TargetTransformInfoImplBase {
unsigned getGISelRematGlobalCost() const { return 1; }
+ unsigned getMinTripCountTailFoldingThreshold() const { return 0; }
+
bool supportsScalableVectors() const { return false; }
bool enableScalableVectorization() const { return false; }
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 74b1083397bef..eba58a0dcce99 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1132,6 +1132,10 @@ unsigned TargetTransformInfo::getGISelRematGlobalCost() const {
return TTIImpl->getGISelRematGlobalCost();
}
+unsigned TargetTransformInfo::getMinTripCountTailFoldingThreshold() const {
+ return TTIImpl->getMinTripCountTailFoldingThreshold();
+}
+
bool TargetTransformInfo::supportsScalableVectors() const {
return TTIImpl->supportsScalableVectors();
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index c4e7135d8b57a..acd243d3f1736 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -335,6 +335,10 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
return 2;
}
+ unsigned getMinTripCountTailFoldingThreshold() const {
+ return ST->hasSVE() ? 5 : 0;
+ }
+
PredicationStyle emitGetActiveLaneMask() const {
if (ST->hasSVE())
return PredicationStyle::DataAndControlFlow;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 368ab0078c0b9..c1d5677d16f78 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -10145,8 +10145,19 @@ bool LoopVectorizePass::processLoop(Loop *L) {
if (Hints.getForce() == LoopVectorizeHints::FK_Enabled)
LLVM_DEBUG(dbgs() << " But vectorizing was explicitly forced.\n");
else {
- LLVM_DEBUG(dbgs() << "\n");
- SEL = CM_ScalarEpilogueNotAllowedLowTripLoop;
+ if (*ExpectedTC > TTI->getMinTripCountTailFoldingThreshold()) {
+ LLVM_DEBUG(dbgs() << "\n");
+ SEL = CM_ScalarEpilogueNotAllowedLowTripLoop;
+ } else {
+ LLVM_DEBUG(dbgs() << " But the target considers the trip count too "
+ "small to consider vectorizing.\n");
+ reportVectorizationFailure(
+ "The trip count is below the minial threshold value.",
+ "loop trip count is too low, avoiding vectorization",
+ "LowTripCount", ORE, L);
+ Hints.emitRemarkWithHints();
+ return false;
+ }
}
}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll
index a97782aac2845..bc3e1c2513a11 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll
@@ -40,18 +40,22 @@ for.end: ; preds = %for.body
define void @trip5_i8(i8* noalias nocapture noundef %dst, i8* noalias nocapture noundef readonly %src) #0 {
; CHECK-LABEL: @trip5_i8(
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
-; CHECK: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ {{%.*}}, %vector.ph ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %vector.body ]
-; CHECK: {{%.*}} = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* {{%.*}}, i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i8> poison)
-; CHECK: {{%.*}} = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* {{%.*}}, i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i8> poison)
-; CHECK: call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> {{%.*}}, <vscale x 16 x i8>* {{%.*}}, i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]])
-; CHECK: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[VF:%.*]] = mul i64 [[VSCALE]], 16
-; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[VF]]
-; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_NEXT]], i64 5)
-; CHECK-NEXT: [[ACTIVE_LANE_MASK_NOT:%.*]] = xor <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i32 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer)
-; CHECK-NEXT: br i1 true, label %middle.block, label %vector.body
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[SRC:%.*]], i64 [[I_08]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP0]], 1
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[DST:%.*]], i64 [[I_08]]
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP1]]
+; CHECK-NEXT: store i8 [[ADD]], i8* [[ARRAYIDX1]], align 1
+; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1
+; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 5
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
;
entry:
br label %for.body
More information about the llvm-commits
mailing list