[llvm] efaf309 - [LV] Print remark when loop cannot be vectorized due to invalid costs.
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 14 09:11:50 PDT 2021
Author: Sander de Smalen
Date: 2021-07-14T17:11:33+01:00
New Revision: efaf3099c8cec1954831ee28a2f75a72096f50eb
URL: https://github.com/llvm/llvm-project/commit/efaf3099c8cec1954831ee28a2f75a72096f50eb
DIFF: https://github.com/llvm/llvm-project/commit/efaf3099c8cec1954831ee28a2f75a72096f50eb.diff
LOG: [LV] Print remark when loop cannot be vectorized due to invalid costs.
This patch emits remarks for instructions that have invalid costs for
a given set of vectorization factors. Some example output:
t.c:4:19: remark: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): load
dst[i] = sinf(src[i]);
^
t.c:4:14: remark: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2, vscale x 4): call to llvm.sin.f32
dst[i] = sinf(src[i]);
^
t.c:4:12: remark: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
dst[i] = sinf(src[i]);
^
Reviewed By: fhahn, kmclaughlin
Differential Revision: https://reviews.llvm.org/D105806
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 71f16a86a0bf3..e70c7f1e71950 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1676,8 +1676,13 @@ class LoopVectorizationCostModel {
/// Returns the expected execution cost. The unit of the cost does
/// not matter because we use the 'cost' units to compare
diff erent
/// vector widths. The cost that is returned is *not* normalized by
- /// the factor width.
- VectorizationCostTy expectedCost(ElementCount VF);
+ /// the factor width. If \p Invalid is not nullptr, this function
+ /// will add a pair(Instruction*, ElementCount) to \p Invalid for
+ /// each instruction that has an Invalid cost for the given VF.
+ using InstructionVFPair = std::pair<Instruction *, ElementCount>;
+ VectorizationCostTy
+ expectedCost(ElementCount VF,
+ SmallVectorImpl<InstructionVFPair> *Invalid = nullptr);
/// Returns the execution time cost of an instruction for a given vector
/// width. Vector width of one means scalar.
@@ -6075,12 +6080,13 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
ChosenFactor.Cost = InstructionCost::getMax();
}
+ SmallVector<InstructionVFPair> InvalidCosts;
for (const auto &i : VFCandidates) {
// The cost for scalar VF=1 is already calculated, so ignore it.
if (i.isScalar())
continue;
- VectorizationCostTy C = expectedCost(i);
+ VectorizationCostTy C = expectedCost(i, &InvalidCosts);
VectorizationFactor Candidate(i, C.first);
LLVM_DEBUG(
dbgs() << "LV: Vector loop of width " << i << " costs: "
@@ -6103,6 +6109,55 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
ChosenFactor = Candidate;
}
+ // Emit a report of VFs with invalid costs in the loop.
+ if (!InvalidCosts.empty()) {
+ // Sort/group per instruction
+ llvm::sort(InvalidCosts, [](InstructionVFPair &A, InstructionVFPair &B) {
+ ElementCountComparator ECC;
+ return A.first->comesBefore(B.first) || ECC(A.second, B.second);
+ });
+
+ // For a list of ordered instruction-vf pairs:
+ // [(load, vf1), (load, vf2), (store, vf1)]
+ // Group the instructions together to emit separate remarks for:
+ // load (vf1, vf2)
+ // store (vf1)
+ auto Tail = ArrayRef<InstructionVFPair>(InvalidCosts);
+ auto Subset = ArrayRef<InstructionVFPair>();
+ do {
+ if (Subset.empty())
+ Subset = Tail.take_front(1);
+
+ Instruction *I = Subset.front().first;
+
+ // If the next instruction is
diff erent, or if there are no other pairs,
+ // emit a remark for the collated subset. e.g.
+ // [(load, vf1), (load, vf2))]
+ // to emit:
+ // remark: invalid costs for 'load' at VF=(vf, vf2)
+ if (Subset == Tail || Tail[Subset.size()].first != I) {
+ std::string OutString;
+ raw_string_ostream OS(OutString);
+ assert(!Subset.empty() && "Unexpected empty range");
+ OS << "Instruction with invalid costs prevented vectorization at VF=(";
+ for (auto &Pair : Subset)
+ OS << (Pair.second == Subset.front().second ? "" : ", ")
+ << Pair.second;
+ OS << "):";
+ if (auto *CI = dyn_cast<CallInst>(I))
+ OS << " call to " << CI->getCalledFunction()->getName();
+ else
+ OS << " " << I->getOpcodeName();
+ OS.flush();
+ reportVectorizationInfo(OutString, "InvalidCost", ORE, TheLoop, I);
+ Tail = Tail.drop_front(Subset.size());
+ Subset = {};
+ } else
+ // Grow the subset by one element
+ Subset = Tail.take_front(Subset.size() + 1);
+ } while (!Tail.empty());
+ }
+
if (!EnableCondStoresVectorization && NumPredStores) {
reportVectorizationFailure("There are conditional stores.",
"store that is conditionally executed prevents vectorization",
@@ -6884,7 +6939,8 @@ int LoopVectorizationCostModel::computePredInstDiscount(
}
LoopVectorizationCostModel::VectorizationCostTy
-LoopVectorizationCostModel::expectedCost(ElementCount VF) {
+LoopVectorizationCostModel::expectedCost(
+ ElementCount VF, SmallVectorImpl<InstructionVFPair> *Invalid) {
VectorizationCostTy Cost;
// For each block.
@@ -6904,6 +6960,10 @@ LoopVectorizationCostModel::expectedCost(ElementCount VF) {
if (ForceTargetInstructionCost.getNumOccurrences() > 0)
C.first = InstructionCost(ForceTargetInstructionCost);
+ // Keep a list of instructions with invalid costs.
+ if (Invalid && !C.first.isValid())
+ Invalid->emplace_back(&I, VF);
+
BlockCost.first += C.first;
BlockCost.second |= C.second;
LLVM_DEBUG(dbgs() << "LV: Found an estimated cost of " << C.first
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
index 767aac9a31abd..e118e98d44af6 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
@@ -1,4 +1,6 @@
-; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu -scalable-vectorization=on < %s | FileCheck %s
+; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu -scalable-vectorization=on \
+; RUN: -pass-remarks-missed=loop-vectorize < %s 2>%t | FileCheck %s
+; RUN: cat %t | FileCheck %s --check-prefix=CHECK-REMARKS
define void @vec_load(i64 %N, double* nocapture %a, double* nocapture readonly %b) {
; CHECK-LABEL: @vec_load
@@ -95,6 +97,10 @@ for.end:
ret void
}
+; CHECK-REMARKS: UserVF ignored because of invalid costs.
+; CHECK-REMARKS-NEXT: t.c:3:10: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): load
+; CHECK-REMARKS-NEXT: t.c:3:20: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
+; CHECK-REMARKS-NEXT: t.c:3:30: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
define void @vec_sin_no_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
; CHECK: @vec_sin_no_mapping
; CHECK: call fast <2 x float> @llvm.sin.v2f32
@@ -105,10 +111,10 @@ entry:
for.body: ; preds = %entry, %for.body
%i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
- %0 = load float, float* %arrayidx, align 4
- %1 = tail call fast float @llvm.sin.f32(float %0)
+ %0 = load float, float* %arrayidx, align 4, !dbg !11
+ %1 = tail call fast float @llvm.sin.f32(float %0), !dbg !12
%arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
- store float %1, float* %arrayidx1, align 4
+ store float %1, float* %arrayidx1, align 4, !dbg !13
%inc = add nuw nsw i64 %i.07, 1
%exitcond.not = icmp eq i64 %inc, %n
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
@@ -117,6 +123,10 @@ for.cond.cleanup: ; preds = %for.body
ret void
}
+; CHECK-REMARKS: UserVF ignored because of invalid costs.
+; CHECK-REMARKS-NEXT: t.c:3:10: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): load
+; CHECK-REMARKS-NEXT: t.c:3:20: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
+; CHECK-REMARKS-NEXT: t.c:3:30: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
define void @vec_sin_fixed_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
; CHECK: @vec_sin_fixed_mapping
; CHECK: call fast <2 x float> @llvm.sin.v2f32
@@ -127,10 +137,10 @@ entry:
for.body: ; preds = %entry, %for.body
%i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
- %0 = load float, float* %arrayidx, align 4
- %1 = tail call fast float @llvm.sin.f32(float %0) #3
+ %0 = load float, float* %arrayidx, align 4, !dbg !11
+ %1 = tail call fast float @llvm.sin.f32(float %0) #3, !dbg !12
%arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
- store float %1, float* %arrayidx1, align 4
+ store float %1, float* %arrayidx1, align 4, !dbg !13
%inc = add nuw nsw i64 %i.07, 1
%exitcond.not = icmp eq i64 %inc, %n
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
@@ -183,3 +193,18 @@ attributes #3 = { "vector-function-abi-variant"="_ZGV_LLVM_N2v_llvm.sin.f64(sin_
!1 = distinct !{!1, !2, !3}
!2 = !{!"llvm.loop.vectorize.width", i32 2}
!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
+
+!llvm.dbg.cu = !{!4}
+!llvm.module.flags = !{!7}
+!llvm.ident = !{!8}
+
+!4 = distinct !DICompileUnit(language: DW_LANG_C99, file: !5, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !6, splitDebugInlining: false, nameTableKind: None)
+!5 = !DIFile(filename: "t.c", directory: "somedir")
+!6 = !{}
+!7 = !{i32 2, !"Debug Info Version", i32 3}
+!8 = !{!"clang"}
+!9 = distinct !DISubprogram(name: "foo", scope: !5, file: !5, line: 2, type: !10, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6)
+!10 = !DISubroutineType(types: !6)
+!11 = !DILocation(line: 3, column: 10, scope: !9)
+!12 = !DILocation(line: 3, column: 20, scope: !9)
+!13 = !DILocation(line: 3, column: 30, scope: !9)
More information about the llvm-commits
mailing list