[llvm] a607f64 - Revert "[LV] Print remark when loop cannot be vectorized due to invalid costs."
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 15 07:22:51 PDT 2021
Author: Sander de Smalen
Date: 2021-07-15T15:21:57+01:00
New Revision: a607f64118240f70bf1b14ec121b65f49d63800d
URL: https://github.com/llvm/llvm-project/commit/a607f64118240f70bf1b14ec121b65f49d63800d
DIFF: https://github.com/llvm/llvm-project/commit/a607f64118240f70bf1b14ec121b65f49d63800d.diff
LOG: Revert "[LV] Print remark when loop cannot be vectorized due to invalid costs."
This reverts commit efaf3099c8cec1954831ee28a2f75a72096f50eb.
This reverts commit dc7bdc1e7121693df112f2fdb11cc6b88580ba4b.
Reverting patches due to buildbot failures.
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e263a5d2038a2..71f16a86a0bf3 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1676,13 +1676,8 @@ class LoopVectorizationCostModel {
/// Returns the expected execution cost. The unit of the cost does
/// not matter because we use the 'cost' units to compare
diff erent
/// vector widths. The cost that is returned is *not* normalized by
- /// the factor width. If \p Invalid is not nullptr, this function
- /// will add a pair(Instruction*, ElementCount) to \p Invalid for
- /// each instruction that has an Invalid cost for the given VF.
- using InstructionVFPair = std::pair<Instruction *, ElementCount>;
- VectorizationCostTy
- expectedCost(ElementCount VF,
- SmallVectorImpl<InstructionVFPair> *Invalid = nullptr);
+ /// the factor width.
+ VectorizationCostTy expectedCost(ElementCount VF);
/// Returns the execution time cost of an instruction for a given vector
/// width. Vector width of one means scalar.
@@ -6080,13 +6075,12 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
ChosenFactor.Cost = InstructionCost::getMax();
}
- SmallVector<InstructionVFPair> InvalidCosts;
for (const auto &i : VFCandidates) {
// The cost for scalar VF=1 is already calculated, so ignore it.
if (i.isScalar())
continue;
- VectorizationCostTy C = expectedCost(i, &InvalidCosts);
+ VectorizationCostTy C = expectedCost(i);
VectorizationFactor Candidate(i, C.first);
LLVM_DEBUG(
dbgs() << "LV: Vector loop of width " << i << " costs: "
@@ -6109,60 +6103,6 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
ChosenFactor = Candidate;
}
- // Emit a report of VFs with invalid costs in the loop.
- if (!InvalidCosts.empty()) {
- // Sort/group per instruction (lexicographically within basic blocks).
- llvm::sort(InvalidCosts, [](InstructionVFPair &A, InstructionVFPair &B) {
- const Instruction *AI = A.first, *BI = B.first;
- if (AI->getParent() != BI->getParent())
- return AI->getParent() < BI->getParent();
- ElementCountComparator ECC;
- if (AI != BI)
- return AI->comesBefore(BI);
- return ECC(A.second, B.second);
- });
-
- // For a list of ordered instruction-vf pairs:
- // [(load, vf1), (load, vf2), (store, vf1)]
- // Group the instructions together to emit separate remarks for:
- // load (vf1, vf2)
- // store (vf1)
- auto Tail = ArrayRef<InstructionVFPair>(InvalidCosts);
- auto Subset = ArrayRef<InstructionVFPair>();
- do {
- if (Subset.empty())
- Subset = Tail.take_front(1);
-
- Instruction *I = Subset.front().first;
-
- // If the next instruction is
diff erent, or if there are no other pairs,
- // emit a remark for the collated subset. e.g.
- // [(load, vf1), (load, vf2))]
- // to emit:
- // remark: invalid costs for 'load' at VF=(vf, vf2)
- if (Subset == Tail || Tail[Subset.size()].first != I) {
- std::string OutString;
- raw_string_ostream OS(OutString);
- assert(!Subset.empty() && "Unexpected empty range");
- OS << "Instruction with invalid costs prevented vectorization at VF=(";
- for (auto &Pair : Subset)
- OS << (Pair.second == Subset.front().second ? "" : ", ")
- << Pair.second;
- OS << "):";
- if (auto *CI = dyn_cast<CallInst>(I))
- OS << " call to " << CI->getCalledFunction()->getName();
- else
- OS << " " << I->getOpcodeName();
- OS.flush();
- reportVectorizationInfo(OutString, "InvalidCost", ORE, TheLoop, I);
- Tail = Tail.drop_front(Subset.size());
- Subset = {};
- } else
- // Grow the subset by one element
- Subset = Tail.take_front(Subset.size() + 1);
- } while (!Tail.empty());
- }
-
if (!EnableCondStoresVectorization && NumPredStores) {
reportVectorizationFailure("There are conditional stores.",
"store that is conditionally executed prevents vectorization",
@@ -6944,8 +6884,7 @@ int LoopVectorizationCostModel::computePredInstDiscount(
}
LoopVectorizationCostModel::VectorizationCostTy
-LoopVectorizationCostModel::expectedCost(
- ElementCount VF, SmallVectorImpl<InstructionVFPair> *Invalid) {
+LoopVectorizationCostModel::expectedCost(ElementCount VF) {
VectorizationCostTy Cost;
// For each block.
@@ -6965,10 +6904,6 @@ LoopVectorizationCostModel::expectedCost(
if (ForceTargetInstructionCost.getNumOccurrences() > 0)
C.first = InstructionCost(ForceTargetInstructionCost);
- // Keep a list of instructions with invalid costs.
- if (Invalid && !C.first.isValid())
- Invalid->emplace_back(&I, VF);
-
BlockCost.first += C.first;
BlockCost.second |= C.second;
LLVM_DEBUG(dbgs() << "LV: Found an estimated cost of " << C.first
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
index 28270e89193a3..767aac9a31abd 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
@@ -1,6 +1,4 @@
-; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu -scalable-vectorization=on \
-; RUN: -pass-remarks-missed=loop-vectorize < %s 2>%t | FileCheck %s
-; RUN: cat %t | FileCheck %s --check-prefix=CHECK-REMARKS
+; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu -scalable-vectorization=on < %s | FileCheck %s
define void @vec_load(i64 %N, double* nocapture %a, double* nocapture readonly %b) {
; CHECK-LABEL: @vec_load
@@ -97,10 +95,6 @@ for.end:
ret void
}
-; CHECK-REMARKS: UserVF ignored because of invalid costs.
-; CHECK-REMARKS-NEXT: t.c:3:10: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): load
-; CHECK-REMARKS-NEXT: t.c:3:20: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
-; CHECK-REMARKS-NEXT: t.c:3:30: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
define void @vec_sin_no_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
; CHECK: @vec_sin_no_mapping
; CHECK: call fast <2 x float> @llvm.sin.v2f32
@@ -111,46 +105,10 @@ entry:
for.body: ; preds = %entry, %for.body
%i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
- %0 = load float, float* %arrayidx, align 4, !dbg !11
- %1 = tail call fast float @llvm.sin.f32(float %0), !dbg !12
- %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
- store float %1, float* %arrayidx1, align 4, !dbg !13
- %inc = add nuw nsw i64 %i.07, 1
- %exitcond.not = icmp eq i64 %inc, %n
- br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
-
-for.cond.cleanup: ; preds = %for.body
- ret void
-}
-
-; CHECK-REMARKS: UserVF ignored because of invalid costs.
-; CHECK-REMARKS-NEXT: t.c:3:10: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): load
-; CHECK-REMARKS-NEXT: t.c:3:40: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
-; CHECK-REMARKS-NEXT: t.c:3:20: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
-; CHECK-REMARKS-NEXT: t.c:3:30: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
-define void @vec_sin_no_mapping_ite(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
-; CHECK: @vec_sin_no_mapping_ite
-; CHECK-NOT: <vscale x
-; CHECK: ret
-entry:
- br label %for.body
-
-for.body: ; preds = %entry, %if.end
- %i.07 = phi i64 [ %inc, %if.end ], [ 0, %entry ]
- %arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
- %0 = load float, float* %arrayidx, align 4, !dbg !11
- %cmp = fcmp ugt float %0, 0.0000
- br i1 %cmp, label %if.then, label %if.else
-if.then:
- %1 = tail call fast float @llvm.sin.f32(float %0), !dbg !12
- br label %if.end
-if.else:
- %2 = tail call fast float @llvm.sin.f32(float 0.0), !dbg !13
- br label %if.end
-if.end:
- %3 = phi float [%1, %if.then], [%2, %if.else]
+ %0 = load float, float* %arrayidx, align 4
+ %1 = tail call fast float @llvm.sin.f32(float %0)
%arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
- store float %3, float* %arrayidx1, align 4, !dbg !14
+ store float %1, float* %arrayidx1, align 4
%inc = add nuw nsw i64 %i.07, 1
%exitcond.not = icmp eq i64 %inc, %n
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
@@ -159,10 +117,6 @@ for.cond.cleanup: ; preds = %for.body
ret void
}
-; CHECK-REMARKS: UserVF ignored because of invalid costs.
-; CHECK-REMARKS-NEXT: t.c:3:10: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): load
-; CHECK-REMARKS-NEXT: t.c:3:20: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
-; CHECK-REMARKS-NEXT: t.c:3:30: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
define void @vec_sin_fixed_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
; CHECK: @vec_sin_fixed_mapping
; CHECK: call fast <2 x float> @llvm.sin.v2f32
@@ -173,10 +127,10 @@ entry:
for.body: ; preds = %entry, %for.body
%i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
- %0 = load float, float* %arrayidx, align 4, !dbg !11
- %1 = tail call fast float @llvm.sin.f32(float %0) #3, !dbg !12
+ %0 = load float, float* %arrayidx, align 4
+ %1 = tail call fast float @llvm.sin.f32(float %0) #3
%arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
- store float %1, float* %arrayidx1, align 4, !dbg !13
+ store float %1, float* %arrayidx1, align 4
%inc = add nuw nsw i64 %i.07, 1
%exitcond.not = icmp eq i64 %inc, %n
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
@@ -229,19 +183,3 @@ attributes #3 = { "vector-function-abi-variant"="_ZGV_LLVM_N2v_llvm.sin.f64(sin_
!1 = distinct !{!1, !2, !3}
!2 = !{!"llvm.loop.vectorize.width", i32 2}
!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
-
-!llvm.dbg.cu = !{!4}
-!llvm.module.flags = !{!7}
-!llvm.ident = !{!8}
-
-!4 = distinct !DICompileUnit(language: DW_LANG_C99, file: !5, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !6, splitDebugInlining: false, nameTableKind: None)
-!5 = !DIFile(filename: "t.c", directory: "somedir")
-!6 = !{}
-!7 = !{i32 2, !"Debug Info Version", i32 3}
-!8 = !{!"clang"}
-!9 = distinct !DISubprogram(name: "foo", scope: !5, file: !5, line: 2, type: !10, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6)
-!10 = !DISubroutineType(types: !6)
-!11 = !DILocation(line: 3, column: 10, scope: !9)
-!12 = !DILocation(line: 3, column: 20, scope: !9)
-!13 = !DILocation(line: 3, column: 30, scope: !9)
-!14 = !DILocation(line: 3, column: 40, scope: !9)
More information about the llvm-commits
mailing list