[PATCH] D26873: [LV] Do not vectorize loops with a low dynamic tripcount, as determined by profile information

Michael Kuperstein via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 18 15:02:24 PST 2016


mkuper created this revision.
mkuper added reviewers: mssimpso, gilr, danielcdh, davidxl.
mkuper added a subscriber: llvm-commits.
Herald added a subscriber: mzolotukhin.

This is somewhat limited at this point - there are two known sources of inaccuracy:

1. We still don't have a code duplication factor, so, for sampling-based FDO, we'll get the wrong trip count if the loop was vectorized in the sampled binary.
2. Loops that are dynamically dead in the profile will still be vectorized, since getLoopEstimatedTripCount() still can't distinguish "loop was never entered" from "no information".

Both of these will need to be fixed on the "estimate trip count" side.
Dehao, David, do you think it's worth waiting with this until we have the duplication factors?


https://reviews.llvm.org/D26873

Files:
  lib/Transforms/Vectorize/LoopVectorize.cpp
  test/Transforms/LoopVectorize/X86/runtime-trip-count.ll


Index: test/Transforms/LoopVectorize/X86/runtime-trip-count.ll
===================================================================
--- test/Transforms/LoopVectorize/X86/runtime-trip-count.ll
+++ test/Transforms/LoopVectorize/X86/runtime-trip-count.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S -debug-only=loop-vectorize 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+; CHECK: Checking a loop in "high_dynamic"
+; CHECK: We can vectorize this loop
+; CHECK: Checking a loop in "low_dynamic"
+; CHECK: Found a loop with a very small trip count.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: @high_dynamic
+; CHECK: fadd <4 x float>
+define void @high_dynamic(float* nocapture %a, i32 %k) !prof !0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
+  %0 = load float, float* %arrayidx, align 4
+  %add = fadd float %0, 1.000000e+00
+  store float %add, float* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %k
+  br i1 %exitcond, label %for.end, label %for.body, !prof !1
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; CHECK-LABEL: @low_dynamic
+; CHECK-NOT: <4 x float>
+define void @low_dynamic(float* nocapture %a, i32 %k) !prof !0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
+  %0 = load float, float* %arrayidx, align 4
+  %add = fadd float %0, 1.000000e+00
+  store float %add, float* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %k
+  br i1 %exitcond, label %for.end, label %for.body, !prof !2
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+
+!0 = !{!"function_entry_count", i64 1}
+!1 = !{!"branch_weights", i32 1001, i32 400001}
+!2 = !{!"branch_weights", i32 1001, i32 4001}
Index: lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- lib/Transforms/Vectorize/LoopVectorize.cpp
+++ lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7179,8 +7179,21 @@
 
   // Check the loop for a trip count threshold:
   // do not vectorize loops with a tiny trip count.
-  const unsigned TC = SE->getSmallConstantTripCount(L);
-  if (TC > 0u && TC < TinyTripCountVectorThreshold) {
+  bool KnownTC = false;
+  unsigned TC = SE->getSmallConstantTripCount(L);
+  if (TC) {
+    KnownTC = true;
+  } else if (F->getEntryCount()) {
+    // If the tripcount is unknown, but profile information is available,
+    // use a profile-based estimate.
+    auto EstimatedTC = getLoopEstimatedTripCount(L);
+    if (EstimatedTC) {
+      TC = *EstimatedTC;
+      KnownTC = true;
+    }
+  }
+
+  if (KnownTC && TC < TinyTripCountVectorThreshold) {
     DEBUG(dbgs() << "LV: Found a loop with a very small trip count. "
                  << "This loop is not worth vectorizing.");
     if (Hints.getForce() == LoopVectorizeHints::FK_Enabled)


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D26873.78587.patch
Type: text/x-patch
Size: 3658 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20161118/115498ea/attachment.bin>


More information about the llvm-commits mailing list