[llvm] ab124bb - [SLP]Fix PR49898: Infinite loop in SLP vectorizer.

Thu Apr 8 14:19:13 PDT 2021

Author: Alexey Bataev
Date: 2021-04-08T14:18:06-07:00
New Revision: ab124bbe2a7c59cf23da5728dc239aba6f1efabe

URL: https://github.com/llvm/llvm-project/commit/ab124bbe2a7c59cf23da5728dc239aba6f1efabe
DIFF: https://github.com/llvm/llvm-project/commit/ab124bbe2a7c59cf23da5728dc239aba6f1efabe.diff

LOG: [SLP]Fix PR49898: Infinite loop in SLP vectorizer.

We should not re-try attempt of finding of the consecutive store chain
if it was tried before.

Differential Revision: https://reviews.llvm.org/D100131

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/several_store_chains.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 76bfdcc97e639..431d0d2e4d669 100644

--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6214,6 +6214,9 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
         break;
   }
 
+  // Tracks if we tried to vectorize stores starting from the given tail
+  // already.
+  SmallBitVector TriedTails(E, false);
   // For stores that start but don't end a link in the chain:
   for (int Cnt = E; Cnt > 0; --Cnt) {
     int I = Cnt - 1;
@@ -6230,8 +6233,9 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
         // Mark the new end in the chain and go back, if required. It might be
         // required if the original stores come in reversed order, for example.
         if (ConsecutiveChain[I].first != E &&
-            Tails.test(ConsecutiveChain[I].first) &&
+            Tails.test(ConsecutiveChain[I].first) && !TriedTails.test(I) &&
             !VectorizedStores.count(Stores[ConsecutiveChain[I].first])) {
+          TriedTails.set(I);
           Tails.reset(ConsecutiveChain[I].first);
           if (Cnt < ConsecutiveChain[I].first + 2)
             Cnt = ConsecutiveChain[I].first + 2;

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/several_store_chains.ll b/llvm/test/Transforms/SLPVectorizer/X86/several_store_chains.ll
new file mode 100644
index 0000000000000..4dfe694c80d52
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/several_store_chains.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -mtriple=x86_64-unknown -slp-vectorizer | FileCheck %s
+
+; Test for PR49898.
+define void @fusion_1506(i8* %temp_buf1) local_unnamed_addr {
+; CHECK-LABEL: @fusion_1506(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[TEMP_BUF1:%.*]], i64 5621415936
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TEMP_BUF1]], i64 7278166016
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TEMP_BUF1]], i64 5097127936
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP1]] to float*
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP4]], i64 undef
+; CHECK-NEXT:    store float undef, float* [[TMP5]], align 16
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[TMP0]] to float*
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP6]], i64 undef
+; CHECK-NEXT:    store float undef, float* [[TMP7]], align 16
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP6]], i64 undef
+; CHECK-NEXT:    store float undef, float* [[TMP8]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP3]], i64 undef
+; CHECK-NEXT:    store float undef, float* [[TMP9]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = getelementptr inbounds i8, i8* %temp_buf1, i64 5621415936
+  %1 = getelementptr inbounds i8, i8* %temp_buf1, i64 7278166016
+  %2 = getelementptr inbounds i8, i8* %temp_buf1, i64 5097127936
+  %3 = bitcast i8* %2 to float*
+  %4 = bitcast i8* %1 to float*
+  %5 = getelementptr inbounds float, float* %4, i64 undef
+  store float undef, float* %5, align 16
+  %6 = bitcast i8* %0 to float*
+  %7 = getelementptr inbounds float, float* %6, i64 undef
+  store float undef, float* %7, align 16
+  %8 = getelementptr inbounds float, float* %6, i64 undef
+  store float undef, float* %8, align 4
+  %9 = getelementptr inbounds float, float* %3, i64 undef
+  store float undef, float* %9, align 4
+  ret void
+}