[llvm] 8cda227 - [LV] Fix crash when computing max VF too early

Mon Feb 1 04:15:19 PST 2021

Author: Cullen Rhodes
Date: 2021-02-01T12:14:59Z
New Revision: 8cda227432f1c9ceb63b88802ed8136da97274f1

URL: https://github.com/llvm/llvm-project/commit/8cda227432f1c9ceb63b88802ed8136da97274f1
DIFF: https://github.com/llvm/llvm-project/commit/8cda227432f1c9ceb63b88802ed8136da97274f1.diff

LOG: [LV] Fix crash when computing max VF too early

D90687 introduced a crash:

  llvm::LoopVectorizationCostModel::computeMaxVF(llvm::ElementCount, unsigned int):
    Assertion `WideningDecisions.empty() && Uniforms.empty() && Scalars.empty() &&
    "No decisions should have been taken at this point"' failed.

when compiling the following C code:

  typedef struct {
  char a;
  } b;

  b *c;
  int d, e;

  int f() {
    int g = 0;
    for (; d; d++) {
      e = 0;
      for (; e < c[d].a; e++)
        g++;
    }
    return g;
  }

with:

  clang -Os -target hexagon -mhvx -fvectorize -mv67 testcase.c -S -o -

This occurred since prior to D90687 computeFeasibleMaxVF would only be
called in computeMaxVF when a scalar epilogue was allowed, but now it's
always called. This causes the assert above since computeFeasibleMaxVF
collects all viable VFs larger than the default MaxVF, and for each VF
calculates the register usage which results in analysis being done the
assert above guards against. This can occur in computeFeasibleMaxVF if
TTI.shouldMaximizeVectorBandwidth and this target hook is implemented in
the hexagon backend to always return true.

Reported by @iajbar.

Reviewed By: fhahn

Differential Revision: https://reviews.llvm.org/D94869

Added: 
    llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll

Modified: 
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b1da80bc5c3e..f770cb6bb2b1 100644

--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5505,11 +5505,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
     return None;
   }
 
-  ElementCount MaxVF = computeFeasibleMaxVF(TC, UserVF);
-
   switch (ScalarEpilogueStatus) {
   case CM_ScalarEpilogueAllowed:
-    return MaxVF;
+    return computeFeasibleMaxVF(TC, UserVF);
   case CM_ScalarEpilogueNotAllowedUsePredicate:
     LLVM_FALLTHROUGH;
   case CM_ScalarEpilogueNotNeededUsePredicate:
@@ -5547,7 +5545,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
       LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking: vectorize with a "
                            "scalar epilogue instead.\n");
       ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
-      return MaxVF;
+      return computeFeasibleMaxVF(TC, UserVF);
     }
     return None;
   }
@@ -5564,6 +5562,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
     InterleaveInfo.invalidateGroupsRequiringScalarEpilogue();
   }
 
+  ElementCount MaxVF = computeFeasibleMaxVF(TC, UserVF);
   assert(!MaxVF.isScalable() &&
          "Scalable vectors do not yet support tail folding");
   assert((UserVF.isNonZero() || isPowerOf2_32(MaxVF.getFixedValue())) &&

diff  --git a/llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll b/llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll
new file mode 100644
index 000000000000..5f8c5d329edf
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll
@@ -0,0 +1,29 @@
+; RUN: opt -march=hexagon -hexagon-autohvx -loop-vectorize -S < %s 2>&1 | FileCheck %s
+
+; Check that we don't crash.
+
+; CHECK-LABEL: @f
+; CHECK: vector.body
+
+target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+target triple = "hexagon"
+
+; Function Attrs: optsize
+define i32 @f() #0 {
+entry:
+  br label %loop
+
+loop:
+  %g.016 = phi i32 [ 0, %entry ], [ %g.1.lcssa, %loop ]
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  %0 = load i8, i8* undef, align 1
+  %g.1.lcssa = add i32 %g.016, undef
+  %iv.next = add nsw i32 %iv, 1
+  %exitcond = icmp eq i32 %iv.next, 0
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret i32 %g.1.lcssa
+}
+
+attributes #0 = { optsize "target-features"="+hvx-length128b" }