[llvm] a607f64 - Revert "[LV] Print remark when loop cannot be vectorized due to invalid costs."

Thu Jul 15 07:22:51 PDT 2021

Author: Sander de Smalen
Date: 2021-07-15T15:21:57+01:00
New Revision: a607f64118240f70bf1b14ec121b65f49d63800d

URL: https://github.com/llvm/llvm-project/commit/a607f64118240f70bf1b14ec121b65f49d63800d
DIFF: https://github.com/llvm/llvm-project/commit/a607f64118240f70bf1b14ec121b65f49d63800d.diff

LOG: Revert "[LV] Print remark when loop cannot be vectorized due to invalid costs."

This reverts commit efaf3099c8cec1954831ee28a2f75a72096f50eb.
This reverts commit dc7bdc1e7121693df112f2fdb11cc6b88580ba4b.

Reverting patches due to buildbot failures.

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e263a5d2038a2..71f16a86a0bf3 100644

--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1676,13 +1676,8 @@ class LoopVectorizationCostModel {
   /// Returns the expected execution cost. The unit of the cost does
   /// not matter because we use the 'cost' units to compare 
diff erent
   /// vector widths. The cost that is returned is *not* normalized by
-  /// the factor width. If \p Invalid is not nullptr, this function
-  /// will add a pair(Instruction*, ElementCount) to \p Invalid for
-  /// each instruction that has an Invalid cost for the given VF.
-  using InstructionVFPair = std::pair<Instruction *, ElementCount>;
-  VectorizationCostTy
-  expectedCost(ElementCount VF,
-               SmallVectorImpl<InstructionVFPair> *Invalid = nullptr);
+  /// the factor width.
+  VectorizationCostTy expectedCost(ElementCount VF);
 
   /// Returns the execution time cost of an instruction for a given vector
   /// width. Vector width of one means scalar.
@@ -6080,13 +6075,12 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
     ChosenFactor.Cost = InstructionCost::getMax();
   }
 
-  SmallVector<InstructionVFPair> InvalidCosts;
   for (const auto &i : VFCandidates) {
     // The cost for scalar VF=1 is already calculated, so ignore it.
     if (i.isScalar())
       continue;
 
-    VectorizationCostTy C = expectedCost(i, &InvalidCosts);
+    VectorizationCostTy C = expectedCost(i);
     VectorizationFactor Candidate(i, C.first);
     LLVM_DEBUG(
         dbgs() << "LV: Vector loop of width " << i << " costs: "
@@ -6109,60 +6103,6 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
       ChosenFactor = Candidate;
   }
 
-  // Emit a report of VFs with invalid costs in the loop.
-  if (!InvalidCosts.empty()) {
-    // Sort/group per instruction (lexicographically within basic blocks).
-    llvm::sort(InvalidCosts, [](InstructionVFPair &A, InstructionVFPair &B) {
-      const Instruction *AI = A.first, *BI = B.first;
-      if (AI->getParent() != BI->getParent())
-        return AI->getParent() < BI->getParent();
-      ElementCountComparator ECC;
-      if (AI != BI)
-        return AI->comesBefore(BI);
-      return ECC(A.second, B.second);
-    });
-
-    // For a list of ordered instruction-vf pairs:
-    //   [(load, vf1), (load, vf2), (store, vf1)]
-    // Group the instructions together to emit separate remarks for:
-    //   load  (vf1, vf2)
-    //   store (vf1)
-    auto Tail = ArrayRef<InstructionVFPair>(InvalidCosts);
-    auto Subset = ArrayRef<InstructionVFPair>();
-    do {
-      if (Subset.empty())
-        Subset = Tail.take_front(1);
-
-      Instruction *I = Subset.front().first;
-
-      // If the next instruction is 
diff erent, or if there are no other pairs,
-      // emit a remark for the collated subset. e.g.
-      //   [(load, vf1), (load, vf2))]
-      // to emit:
-      //  remark: invalid costs for 'load' at VF=(vf, vf2)
-      if (Subset == Tail || Tail[Subset.size()].first != I) {
-        std::string OutString;
-        raw_string_ostream OS(OutString);
-        assert(!Subset.empty() && "Unexpected empty range");
-        OS << "Instruction with invalid costs prevented vectorization at VF=(";
-        for (auto &Pair : Subset)
-          OS << (Pair.second == Subset.front().second ? "" : ", ")
-             << Pair.second;
-        OS << "):";
-        if (auto *CI = dyn_cast<CallInst>(I))
-          OS << " call to " << CI->getCalledFunction()->getName();
-        else
-          OS << " " << I->getOpcodeName();
-        OS.flush();
-        reportVectorizationInfo(OutString, "InvalidCost", ORE, TheLoop, I);
-        Tail = Tail.drop_front(Subset.size());
-        Subset = {};
-      } else
-        // Grow the subset by one element
-        Subset = Tail.take_front(Subset.size() + 1);
-    } while (!Tail.empty());
-  }
-
   if (!EnableCondStoresVectorization && NumPredStores) {
     reportVectorizationFailure("There are conditional stores.",
         "store that is conditionally executed prevents vectorization",
@@ -6944,8 +6884,7 @@ int LoopVectorizationCostModel::computePredInstDiscount(
 }
 
 LoopVectorizationCostModel::VectorizationCostTy
-LoopVectorizationCostModel::expectedCost(
-    ElementCount VF, SmallVectorImpl<InstructionVFPair> *Invalid) {
+LoopVectorizationCostModel::expectedCost(ElementCount VF) {
   VectorizationCostTy Cost;
 
   // For each block.
@@ -6965,10 +6904,6 @@ LoopVectorizationCostModel::expectedCost(
       if (ForceTargetInstructionCost.getNumOccurrences() > 0)
         C.first = InstructionCost(ForceTargetInstructionCost);
 
-      // Keep a list of instructions with invalid costs.
-      if (Invalid && !C.first.isValid())
-        Invalid->emplace_back(&I, VF);
-
       BlockCost.first += C.first;
       BlockCost.second |= C.second;
       LLVM_DEBUG(dbgs() << "LV: Found an estimated cost of " << C.first

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
index 28270e89193a3..767aac9a31abd 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
@@ -1,6 +1,4 @@
-; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu -scalable-vectorization=on \
-; RUN:     -pass-remarks-missed=loop-vectorize < %s 2>%t | FileCheck %s
-; RUN: cat %t | FileCheck %s --check-prefix=CHECK-REMARKS
+; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu -scalable-vectorization=on < %s | FileCheck %s
 
 define void @vec_load(i64 %N, double* nocapture %a, double* nocapture readonly %b) {
 ; CHECK-LABEL: @vec_load
@@ -97,10 +95,6 @@ for.end:
   ret void
 }
 
-; CHECK-REMARKS: UserVF ignored because of invalid costs.
-; CHECK-REMARKS-NEXT: t.c:3:10: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): load
-; CHECK-REMARKS-NEXT: t.c:3:20: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
-; CHECK-REMARKS-NEXT: t.c:3:30: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
 define void @vec_sin_no_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
 ; CHECK: @vec_sin_no_mapping
 ; CHECK: call fast <2 x float> @llvm.sin.v2f32
@@ -111,46 +105,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
-  %0 = load float, float* %arrayidx, align 4, !dbg !11
-  %1 = tail call fast float @llvm.sin.f32(float %0), !dbg !12
-  %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
-  store float %1, float* %arrayidx1, align 4, !dbg !13
-  %inc = add nuw nsw i64 %i.07, 1
-  %exitcond.not = icmp eq i64 %inc, %n
-  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
-
-for.cond.cleanup:                                 ; preds = %for.body
-  ret void
-}
-
-; CHECK-REMARKS: UserVF ignored because of invalid costs.
-; CHECK-REMARKS-NEXT: t.c:3:10: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): load
-; CHECK-REMARKS-NEXT: t.c:3:40: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
-; CHECK-REMARKS-NEXT: t.c:3:20: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
-; CHECK-REMARKS-NEXT: t.c:3:30: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
-define void @vec_sin_no_mapping_ite(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
-; CHECK: @vec_sin_no_mapping_ite
-; CHECK-NOT: <vscale x
-; CHECK: ret
-entry:
-  br label %for.body
-
-for.body:                                         ; preds = %entry, %if.end
-  %i.07 = phi i64 [ %inc, %if.end ], [ 0, %entry ]
-  %arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
-  %0 = load float, float* %arrayidx, align 4, !dbg !11
-  %cmp = fcmp ugt float %0, 0.0000
-  br i1 %cmp, label %if.then, label %if.else
-if.then:
-  %1 = tail call fast float @llvm.sin.f32(float %0), !dbg !12
-  br label %if.end
-if.else:
-  %2 = tail call fast float @llvm.sin.f32(float 0.0), !dbg !13
-  br label %if.end
-if.end:
-  %3 = phi float [%1, %if.then], [%2, %if.else]
+  %0 = load float, float* %arrayidx, align 4
+  %1 = tail call fast float @llvm.sin.f32(float %0)
   %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
-  store float %3, float* %arrayidx1, align 4, !dbg !14
+  store float %1, float* %arrayidx1, align 4
   %inc = add nuw nsw i64 %i.07, 1
   %exitcond.not = icmp eq i64 %inc, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
@@ -159,10 +117,6 @@ for.cond.cleanup:                                 ; preds = %for.body
   ret void
 }
 
-; CHECK-REMARKS: UserVF ignored because of invalid costs.
-; CHECK-REMARKS-NEXT: t.c:3:10: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): load
-; CHECK-REMARKS-NEXT: t.c:3:20: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): call to llvm.sin.f32
-; CHECK-REMARKS-NEXT: t.c:3:30: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
 define void @vec_sin_fixed_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
 ; CHECK: @vec_sin_fixed_mapping
 ; CHECK: call fast <2 x float> @llvm.sin.v2f32
@@ -173,10 +127,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
-  %0 = load float, float* %arrayidx, align 4, !dbg !11
-  %1 = tail call fast float @llvm.sin.f32(float %0) #3, !dbg !12
+  %0 = load float, float* %arrayidx, align 4
+  %1 = tail call fast float @llvm.sin.f32(float %0) #3
   %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
-  store float %1, float* %arrayidx1, align 4, !dbg !13
+  store float %1, float* %arrayidx1, align 4
   %inc = add nuw nsw i64 %i.07, 1
   %exitcond.not = icmp eq i64 %inc, %n
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
@@ -229,19 +183,3 @@ attributes #3 = { "vector-function-abi-variant"="_ZGV_LLVM_N2v_llvm.sin.f64(sin_
 !1 = distinct !{!1, !2, !3}
 !2 = !{!"llvm.loop.vectorize.width", i32 2}
 !3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
-
-!llvm.dbg.cu = !{!4}
-!llvm.module.flags = !{!7}
-!llvm.ident = !{!8}
-
-!4 = distinct !DICompileUnit(language: DW_LANG_C99, file: !5, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !6, splitDebugInlining: false, nameTableKind: None)
-!5 = !DIFile(filename: "t.c", directory: "somedir")
-!6 = !{}
-!7 = !{i32 2, !"Debug Info Version", i32 3}
-!8 = !{!"clang"}
-!9 = distinct !DISubprogram(name: "foo", scope: !5, file: !5, line: 2, type: !10, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6)
-!10 = !DISubroutineType(types: !6)
-!11 = !DILocation(line: 3, column: 10, scope: !9)
-!12 = !DILocation(line: 3, column: 20, scope: !9)
-!13 = !DILocation(line: 3, column: 30, scope: !9)
-!14 = !DILocation(line: 3, column: 40, scope: !9)