[llvm] 239d01f - Reland "[LV] Print remark when loop cannot be vectorized due to invalid costs."
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 16 02:53:18 PDT 2021
Author: Sander de Smalen
Date: 2021-07-16T10:52:01+01:00
New Revision: 239d01fa884d8707ece2f2dbf0eafcbbf8714aa4
URL: https://github.com/llvm/llvm-project/commit/239d01fa884d8707ece2f2dbf0eafcbbf8714aa4
DIFF: https://github.com/llvm/llvm-project/commit/239d01fa884d8707ece2f2dbf0eafcbbf8714aa4.diff
LOG: Reland "[LV] Print remark when loop cannot be vectorized due to invalid costs."
The original patch was:
https://reviews.llvm.org/D105806
There were some issues with undeterministic behaviour of the sorting
function, which led to scalable-call.ll passing and/or failing. This
patch fixes the issue by numbering all instructions in the array first,
and using that number as the order, which should provide a consistent
ordering.
This reverts commit a607f64118240f70bf1b14ec121b65f49d63800d.
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 71f16a86a0bf3..64dce08eacd2e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1676,8 +1676,13 @@ class LoopVectorizationCostModel {
/// Returns the expected execution cost. The unit of the cost does
/// not matter because we use the 'cost' units to compare
diff erent
/// vector widths. The cost that is returned is *not* normalized by
- /// the factor width.
- VectorizationCostTy expectedCost(ElementCount VF);
+ /// the factor width. If \p Invalid is not nullptr, this function
+ /// will add a pair(Instruction*, ElementCount) to \p Invalid for
+ /// each instruction that has an Invalid cost for the given VF.
+ using InstructionVFPair = std::pair<Instruction *, ElementCount>;
+ VectorizationCostTy
+ expectedCost(ElementCount VF,
+ SmallVectorImpl<InstructionVFPair> *Invalid = nullptr);
/// Returns the execution time cost of an instruction for a given vector
/// width. Vector width of one means scalar.
@@ -6075,12 +6080,13 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
ChosenFactor.Cost = InstructionCost::getMax();
}
+ SmallVector<InstructionVFPair> InvalidCosts;
for (const auto &i : VFCandidates) {
// The cost for scalar VF=1 is already calculated, so ignore it.
if (i.isScalar())
continue;
- VectorizationCostTy C = expectedCost(i);
+ VectorizationCostTy C = expectedCost(i, &InvalidCosts);
VectorizationFactor Candidate(i, C.first);
LLVM_DEBUG(
dbgs() << "LV: Vector loop of width " << i << " costs: "
@@ -6103,6 +6109,66 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
ChosenFactor = Candidate;
}
+ // Emit a report of VFs with invalid costs in the loop.
+ if (!InvalidCosts.empty()) {
+ // Group the remarks per instruction, keeping the instruction order from
+ // InvalidCosts.
+ std::map<Instruction *, unsigned> Numbering;
+ unsigned I = 0;
+ for (auto &Pair : InvalidCosts)
+ if (!Numbering.count(Pair.first))
+ Numbering[Pair.first] = I++;
+
+ // Sort the list, first on instruction(number) then on VF.
+ llvm::sort(InvalidCosts,
+ [&Numbering](InstructionVFPair &A, InstructionVFPair &B) {
+ if (Numbering[A.first] != Numbering[B.first])
+ return Numbering[A.first] < Numbering[B.first];
+ ElementCountComparator ECC;
+ return ECC(A.second, B.second);
+ });
+
+ // For a list of ordered instruction-vf pairs:
+ // [(load, vf1), (load, vf2), (store, vf1)]
+ // Group the instructions together to emit separate remarks for:
+ // load (vf1, vf2)
+ // store (vf1)
+ auto Tail = ArrayRef<InstructionVFPair>(InvalidCosts);
+ auto Subset = ArrayRef<InstructionVFPair>();
+ do {
+ if (Subset.empty())
+ Subset = Tail.take_front(1);
+
+ Instruction *I = Subset.front().first;
+
+ // If the next instruction is
diff erent, or if there are no other pairs,
+ // emit a remark for the collated subset. e.g.
+ // [(load, vf1), (load, vf2))]
+ // to emit:
+ // remark: invalid costs for 'load' at VF=(vf, vf2)
+ if (Subset == Tail || Tail[Subset.size()].first != I) {
+ std::string OutString;
+ raw_string_ostream OS(OutString);
+ assert(!Subset.empty() && "Unexpected empty range");
+ OS << "Instruction with invalid costs prevented vectorization at VF=(";
+ for (auto &Pair : Subset)
+ OS << (Pair.second == Subset.front().second ? "" : ", ")
+ << Pair.second;
+ OS << "):";
+ if (auto *CI = dyn_cast<CallInst>(I))
+ OS << " call to " << CI->getCalledFunction()->getName();
+ else
+ OS << " " << I->getOpcodeName();
+ OS.flush();
+ reportVectorizationInfo(OutString, "InvalidCost", ORE, TheLoop, I);
+ Tail = Tail.drop_front(Subset.size());
+ Subset = {};
+ } else
+ // Grow the subset by one element
+ Subset = Tail.take_front(Subset.size() + 1);
+ } while (!Tail.empty());
+ }
+
if (!EnableCondStoresVectorization && NumPredStores) {
reportVectorizationFailure("There are conditional stores.",
"store that is conditionally executed prevents vectorization",
@@ -6884,7 +6950,8 @@ int LoopVectorizationCostModel::computePredInstDiscount(
}
LoopVectorizationCostModel::VectorizationCostTy
-LoopVectorizationCostModel::expectedCost(ElementCount VF) {
+LoopVectorizationCostModel::expectedCost(
+ ElementCount VF, SmallVectorImpl<InstructionVFPair> *Invalid) {
VectorizationCostTy Cost;
// For each block.
@@ -6904,6 +6971,10 @@ LoopVectorizationCostModel::expectedCost(ElementCount VF) {
if (ForceTargetInstructionCost.getNumOccurrences() > 0)
C.first = InstructionCost(ForceTargetInstructionCost);
+ // Keep a list of instructions with invalid costs.
+ if (Invalid && !C.first.isValid())
+ Invalid->emplace_back(&I, VF);
+
BlockCost.first += C.first;
BlockCost.second |= C.second;
LLVM_DEBUG(dbgs() << "LV: Found an estimated cost of " << C.first
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
index 767aac9a31abd..d7a3f719e151e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
@@ -1,4 +1,6 @@
-; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu -scalable-vectorization=on < %s | FileCheck %s
+; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu -scalable-vectorization=on \
+; RUN: -pass-remarks-missed=loop-vectorize < %s 2>%t | FileCheck %s
+; RUN: cat %t | FileCheck %s --check-prefix=CHECK-REMARKS
define void @vec_load(i64 %N, double* nocapture %a, double* nocapture readonly %b) {
; CHECK-LABEL: @vec_load
@@ -95,6 +97,10 @@ for.end:
ret void
}
+; CHECK-REMARKS: UserVF ignored because of invalid costs.
+; CHECK-REMARKS-NEXT: t.c:3:10: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): load
+; CHECK-REMARKS-NEXT: t.c:3:20: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
+; CHECK-REMARKS-NEXT: t.c:3:30: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
define void @vec_sin_no_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
; CHECK: @vec_sin_no_mapping
; CHECK: call fast <2 x float> @llvm.sin.v2f32
@@ -105,10 +111,10 @@ entry:
for.body: ; preds = %entry, %for.body
%i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
- %0 = load float, float* %arrayidx, align 4
- %1 = tail call fast float @llvm.sin.f32(float %0)
+ %0 = load float, float* %arrayidx, align 4, !dbg !11
+ %1 = tail call fast float @llvm.sin.f32(float %0), !dbg !12
%arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
- store float %1, float* %arrayidx1, align 4
+ store float %1, float* %arrayidx1, align 4, !dbg !13
%inc = add nuw nsw i64 %i.07, 1
%exitcond.not = icmp eq i64 %inc, %n
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
@@ -117,6 +123,46 @@ for.cond.cleanup: ; preds = %for.body
ret void
}
+; CHECK-REMARKS: UserVF ignored because of invalid costs.
+; CHECK-REMARKS-NEXT: t.c:3:10: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): load
+; CHECK-REMARKS-NEXT: t.c:3:30: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
+; CHECK-REMARKS-NEXT: t.c:3:20: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
+; CHECK-REMARKS-NEXT: t.c:3:40: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
+define void @vec_sin_no_mapping_ite(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
+; CHECK: @vec_sin_no_mapping_ite
+; CHECK-NOT: <vscale x
+; CHECK: ret
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %if.end
+ %i.07 = phi i64 [ %inc, %if.end ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
+ %0 = load float, float* %arrayidx, align 4, !dbg !11
+ %cmp = fcmp ugt float %0, 0.0000
+ br i1 %cmp, label %if.then, label %if.else
+if.then:
+ %1 = tail call fast float @llvm.sin.f32(float %0), !dbg !12
+ br label %if.end
+if.else:
+ %2 = tail call fast float @llvm.sin.f32(float 0.0), !dbg !13
+ br label %if.end
+if.end:
+ %3 = phi float [%1, %if.then], [%2, %if.else]
+ %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
+ store float %3, float* %arrayidx1, align 4, !dbg !14
+ %inc = add nuw nsw i64 %i.07, 1
+ %exitcond.not = icmp eq i64 %inc, %n
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+}
+
+; CHECK-REMARKS: UserVF ignored because of invalid costs.
+; CHECK-REMARKS-NEXT: t.c:3:10: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): load
+; CHECK-REMARKS-NEXT: t.c:3:20: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
+; CHECK-REMARKS-NEXT: t.c:3:30: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
define void @vec_sin_fixed_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
; CHECK: @vec_sin_fixed_mapping
; CHECK: call fast <2 x float> @llvm.sin.v2f32
@@ -127,10 +173,10 @@ entry:
for.body: ; preds = %entry, %for.body
%i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
- %0 = load float, float* %arrayidx, align 4
- %1 = tail call fast float @llvm.sin.f32(float %0) #3
+ %0 = load float, float* %arrayidx, align 4, !dbg !11
+ %1 = tail call fast float @llvm.sin.f32(float %0) #3, !dbg !12
%arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
- store float %1, float* %arrayidx1, align 4
+ store float %1, float* %arrayidx1, align 4, !dbg !13
%inc = add nuw nsw i64 %i.07, 1
%exitcond.not = icmp eq i64 %inc, %n
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
@@ -183,3 +229,19 @@ attributes #3 = { "vector-function-abi-variant"="_ZGV_LLVM_N2v_llvm.sin.f64(sin_
!1 = distinct !{!1, !2, !3}
!2 = !{!"llvm.loop.vectorize.width", i32 2}
!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
+
+!llvm.dbg.cu = !{!4}
+!llvm.module.flags = !{!7}
+!llvm.ident = !{!8}
+
+!4 = distinct !DICompileUnit(language: DW_LANG_C99, file: !5, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !6, splitDebugInlining: false, nameTableKind: None)
+!5 = !DIFile(filename: "t.c", directory: "somedir")
+!6 = !{}
+!7 = !{i32 2, !"Debug Info Version", i32 3}
+!8 = !{!"clang"}
+!9 = distinct !DISubprogram(name: "foo", scope: !5, file: !5, line: 2, type: !10, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6)
+!10 = !DISubroutineType(types: !6)
+!11 = !DILocation(line: 3, column: 10, scope: !9)
+!12 = !DILocation(line: 3, column: 20, scope: !9)
+!13 = !DILocation(line: 3, column: 30, scope: !9)
+!14 = !DILocation(line: 3, column: 40, scope: !9)
More information about the llvm-commits
mailing list