[PATCH] D149338: [SLP] Add TreeEntry into PostponedGathers if it depends on another previously postponed TreeEntry

Evgeniy via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 27 05:40:42 PDT 2023


ebrevnov created this revision.
Herald added subscribers: vporpo, hiraditya.
Herald added a project: All.
ebrevnov requested review of this revision.
Herald added subscribers: llvm-commits, pcwang-thead.
Herald added a project: LLVM.

https://reviews.llvm.org/D144958 change introduced PostponedGathers into SLP Vectorizer. As far as I understand it allows to generate temporary fake loads instead of real instructions and update later postponed entries at the end of vectorization. But during this update can be generated an instruction which uses a fake load which was already replaced. That leads to an incorrect dependency and assertion "trying to erase instruction with users" failure in BoUpSLP destructor.

This change helps to avoid such a scenario by adding into PostponedGathers every node which depends on the node already added into PostponedGathers.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D149338

Files:
  llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
  llvm/test/Transforms/SLPVectorizer/X86/postponed_gathers.ll


Index: llvm/test/Transforms/SLPVectorizer/X86/postponed_gathers.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/SLPVectorizer/X86/postponed_gathers.ll
@@ -0,0 +1,77 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt < %s -passes=slp-vectorizer -slp-threshold=-10 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck %s
+
+define void @"foo"() {
+; CHECK-LABEL: define void @foo() {
+; CHECK-NEXT:  bci_0:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(1) null, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[BCI_252:%.*]]
+; CHECK:       bci_252:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi <2 x i32> [ zeroinitializer, [[BCI_0:%.*]] ], [ [[TMP16:%.*]], [[BCI_252_1:%.*]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = mul <2 x i32> zeroinitializer, [[TMP1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = or <2 x i32> [[TMP3]], zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = or <2 x i32> [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    [[TMP7:%.*]] = or <2 x i32> [[TMP6]], zeroinitializer
+; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i32> [[TMP5]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = or <2 x i32> [[TMP8]], zeroinitializer
+; CHECK-NEXT:    br i1 false, label [[NOT_ZERO70:%.*]], label [[BCI_252_1]]
+; CHECK:       bci_252.1:
+; CHECK-NEXT:    [[TMP10:%.*]] = or <2 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = or <2 x i32> [[TMP9]], zeroinitializer
+; CHECK-NEXT:    [[TMP12:%.*]] = mul <2 x i32> [[TMP10]], zeroinitializer
+; CHECK-NEXT:    [[TMP13:%.*]] = or <2 x i32> [[TMP2]], [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer
+; CHECK-NEXT:    [[TMP15:%.*]] = or <2 x i32> [[TMP11]], [[TMP14]]
+; CHECK-NEXT:    [[TMP16]] = or <2 x i32> [[TMP15]], zeroinitializer
+; CHECK-NEXT:    br label [[BCI_252]]
+; CHECK:       not_zero70:
+; CHECK-NEXT:    [[TMP17:%.*]] = phi <2 x i32> [ [[TMP9]], [[BCI_252]] ]
+; CHECK-NEXT:    ret void
+;
+bci_0:
+  %0 = load i32, ptr addrspace(1) null, align 8
+  br label %bci_252
+
+bci_252:
+  %1 = phi i32 [ 0, %bci_0 ], [ %20, %bci_252.1 ]
+  %2 = phi i32 [ 0, %bci_0 ], [ %15, %bci_252.1 ]
+  %3 = mul i32 %0, 0
+  %4 = or i32 %0, %3
+  %5 = or i32 %4, 0
+  %.neg91.neg = or i32 %2, 0
+  %.neg446 = or i32 %.neg91.neg, %5
+  %6 = or i32 %.neg446, 0
+  %7 = mul i32 0, 0
+  %8 = or i32 %0, %7
+  %9 = or i32 %8, 0
+  %.neg91.1.neg = or i32 %1, 0
+  %.neg448 = or i32 %.neg91.1.neg, %9
+  %10 = or i32 %.neg448, 0
+  br i1 false, label %not_zero70, label %bci_252.1
+
+bci_252.1:
+  %11 = or i32 %0, 0
+  %12 = mul i32 %11, 0
+  %13 = or i32 %0, %12
+  %14 = or i32 %13, 0
+  %.neg91.neg.1 = or i32 %6, 0
+  %.neg446.1 = or i32 %.neg91.neg.1, %14
+  %15 = or i32 %.neg446.1, 0
+  %16 = or i32 %0, 0
+  %17 = mul i32 %16, 0
+  %18 = or i32 %0, %17
+  %19 = or i32 %18, 0
+  %.neg91.1.neg.1 = or i32 %10, 0
+  %.neg448.1 = or i32 %.neg91.1.neg.1, %19
+  %20 = or i32 %.neg448.1, 0
+  br label %bci_252
+
+not_zero70:
+  %.lcssa546 = phi i32 [ %6, %bci_252 ]
+  %.lcssa545 = phi i32 [ %10, %bci_252 ]
+  ret void
+}
+
+
Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9590,6 +9590,10 @@
         // process to keep correct order.
         return Delayed;
       }
+      if (any_of(Entries,
+                 [&](const TreeEntry *E) { return PostponedGathers.count(E); }))
+        PostponedGathers.insert(E);
+
       assert((Entries.size() == 1 || Entries.size() == 2) &&
              "Expected shuffle of 1 or 2 entries.");
       if (*GatherShuffle == TTI::SK_PermuteSingleSrc &&


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D149338.517527.patch
Type: text/x-patch
Size: 3954 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230427/30ff816c/attachment.bin>


More information about the llvm-commits mailing list