[llvm] fc1f205 - [SLPVectorizer][SVE] Bail out early for scalable vector.

Fri Mar 13 11:23:48 PDT 2020

Author: Huihui Zhang
Date: 2020-03-13T11:23:31-07:00
New Revision: fc1f205745003eeeabd4ee356dea7e5c7fa4fd64

URL: https://github.com/llvm/llvm-project/commit/fc1f205745003eeeabd4ee356dea7e5c7fa4fd64
DIFF: https://github.com/llvm/llvm-project/commit/fc1f205745003eeeabd4ee356dea7e5c7fa4fd64.diff

LOG: [SLPVectorizer][SVE] Bail out early for scalable vector.

Summary:
SLPVectorizer try to vectorize list of scalar instructions of the same type,
instructions already vectorized are rejected through isValidElementType().

Without this patch, tryToVectorizeList() will first try to determine vectorization
factor of a list of Instructions before checking whether each instruction has unsupported
type or not. For instructions already vectorized for SVE, it will crash at getVectorElementSize(),
where it try to return a fixed size.

This patch make sure invalid element types are rejected before trying to get vectorization
factor. This make sure we are not trying to vectorize instructions already vectorized.

Reviewers: sdesmalen, efriedma, spatel, RKSimon, ABataev, apazos, rengolin

Reviewed By: efriedma

Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D76017

Added: 
    llvm/test/Transforms/SLPVectorizer/AArch64/invalid_type.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 68d063d191e2..e411c4ece83d 100644

--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5899,25 +5899,15 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
   LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize a list of length = "
                     << VL.size() << ".\n");
 
-  // Check that all of the parts are scalar instructions of the same type,
+  // Check that all of the parts are instructions of the same type,
   // we permit an alternate opcode via InstructionsState.
   InstructionsState S = getSameOpcode(VL);
   if (!S.getOpcode())
     return false;
 
   Instruction *I0 = cast<Instruction>(S.OpValue);
-  unsigned Sz = R.getVectorElementSize(I0);
-  unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz);
-  unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF);
-  if (MaxVF < 2) {
-    R.getORE()->emit([&]() {
-      return OptimizationRemarkMissed(SV_NAME, "SmallVF", I0)
-             << "Cannot SLP vectorize list: vectorization factor "
-             << "less than 2 is not supported";
-    });
-    return false;
-  }
-
+  // Make sure invalid types (including vector type) are rejected before
+  // determining vectorization factor for scalar instructions.
   for (Value *V : VL) {
     Type *Ty = V->getType();
     if (!isValidElementType(Ty)) {
@@ -5935,6 +5925,18 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
     }
   }
 
+  unsigned Sz = R.getVectorElementSize(I0);
+  unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz);
+  unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF);
+  if (MaxVF < 2) {
+    R.getORE()->emit([&]() {
+      return OptimizationRemarkMissed(SV_NAME, "SmallVF", I0)
+             << "Cannot SLP vectorize list: vectorization factor "
+             << "less than 2 is not supported";
+    });
+    return false;
+  }
+
   bool Changed = false;
   bool CandidateFound = false;
   int MinCost = SLPCostThreshold;

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/invalid_type.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/invalid_type.ll
new file mode 100644
index 000000000000..cfd2463f9d85
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/invalid_type.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -slp-vectorizer -S -pass-remarks-missed=slp-vectorizer 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+; This test check that slp vectorizer is not trying to vectorize instructions already vectorized.
+; CHECK: remark: <unknown>:0:0: Cannot SLP vectorize list: type <16 x i8> is unsupported by vectorizer
+
+define void @vector() {
+  %load0 = tail call <16 x i8> @vector.load(<16 x i8> *undef, i32 1)
+  %load1 = tail call <16 x i8> @vector.load(<16 x i8> *undef, i32 2)
+  %add = add <16 x i8> %load1, %load0
+  tail call void @vector.store(<16 x i8> %add, <16 x i8>* undef, i32 1)
+  ret void
+}
+
+declare <16 x i8> @vector.load(<16 x i8>*, i32)
+declare void @vector.store(<16 x i8>, <16 x i8>*, i32)

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll
new file mode 100644
index 000000000000..70ce0dc4d7ba
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -slp-vectorizer -S | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+; This test check that we are not crashing or changing the code.
+
+define void @test() {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[LOAD0:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+; CHECK-NEXT:    [[LOAD1:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+; CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 16 x i8> [[LOAD1]], [[LOAD0]]
+; CHECK-NEXT:    tail call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> [[ADD]], <vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> undef)
+; CHECK-NEXT:    ret void
+;
+  %load0 = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8> *undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+  %load1 = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8> *undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
+  %add = add <vscale x 16 x i8> %load1, %load0
+  tail call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> %add, <vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> undef)
+  ret void
+}
+
+declare <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>*, i32 immarg, <vscale x 16 x i1>, <vscale x 16 x i8>)
+declare void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>*, i32 immarg, <vscale x 16 x i1>)