[llvm] 7152bf3 - [SLP]Do not create new vector node if scalars fully overlap with the existing one

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 28 07:02:37 PDT 2024


Author: Alexey Bataev
Date: 2024-10-28T06:59:41-07:00
New Revision: 7152bf3bc805b8d9b1873058ab0a084d7b6079d6

URL: https://github.com/llvm/llvm-project/commit/7152bf3bc805b8d9b1873058ab0a084d7b6079d6
DIFF: https://github.com/llvm/llvm-project/commit/7152bf3bc805b8d9b1873058ab0a084d7b6079d6.diff

LOG: [SLP]Do not create new vector node if scalars fully overlap with the existing one

If the list of scalars vectorized as the part of the same vector node,
no need to generate vector node again, it will be handled as part of
overlapping matching.

Fixes #113810

Added: 
    llvm/test/Transforms/SLPVectorizer/full-overlap-non-schedulable.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2afd02dae3a8b8..268546fe99e138 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7947,8 +7947,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
           Nodes.insert(E);
         SmallPtrSet<Value *, 8> Values(VL.begin(), VL.end());
         if (any_of(Nodes, [&](const TreeEntry *E) {
-              return all_of(E->Scalars,
-                            [&](Value *V) { return Values.contains(V); });
+              if (all_of(E->Scalars,
+                         [&](Value *V) { return Values.contains(V); }))
+                return true;
+              SmallPtrSet<Value *, 8> EValues(E->Scalars.begin(),
+                                              E->Scalars.end());
+              return (
+                  all_of(VL, [&](Value *V) { return EValues.contains(V); }));
             })) {
           LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
           if (TryToFindDuplicates(S))

diff  --git a/llvm/test/Transforms/SLPVectorizer/full-overlap-non-schedulable.ll b/llvm/test/Transforms/SLPVectorizer/full-overlap-non-schedulable.ll
new file mode 100644
index 00000000000000..dbd91199c24ecd
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/full-overlap-non-schedulable.ll
@@ -0,0 +1,93 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer < %s | FileCheck %s
+
+define void @test(ptr %p1, ptr %0, i32 %1, i1 %c1, ptr %p2) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[P1:%.*]], ptr [[TMP0:%.*]], i32 [[TMP1:%.*]], i1 [[C1:%.*]], ptr [[P2:%.*]]) {
+; CHECK-NEXT:  [[TOP:.*:]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, <4 x ptr> [[TMP4]], <4 x i64> <i64 8, i64 12, i64 16, i64 20>
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x ptr> [[TMP5]], i32 2
+; CHECK-NEXT:    br i1 [[C1]], label %[[L42:.*]], label %[[L41:.*]]
+; CHECK:       [[L41]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq <4 x ptr> [[TMP5]], zeroinitializer
+; CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> zeroinitializer, <4 x i32> [[TMP8]]
+; CHECK-NEXT:    br label %[[L112:.*]]
+; CHECK:       [[L42]]:
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[DOTNOT280:%.*]] = icmp eq i32 [[TMP10]], 0
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[TMP1]], i32 2
+; CHECK-NEXT:    br i1 [[DOTNOT280]], label %[[L112]], label %[[L47:.*]]
+; CHECK:       [[L47]]:
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x ptr> [[TMP5]], i32 1
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <4 x ptr> [[TMP5]], <4 x ptr> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp eq <2 x ptr> [[TMP14]], zeroinitializer
+; CHECK-NEXT:    [[TMP16:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP17:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> zeroinitializer, <2 x i32> [[TMP16]]
+; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>, i32 [[TMP13]], i32 1
+; CHECK-NEXT:    [[TMP19:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP18]], <2 x i32> [[TMP17]], i64 2)
+; CHECK-NEXT:    br label %[[L112]]
+; CHECK:       [[L112]]:
+; CHECK-NEXT:    [[TMP20:%.*]] = phi <4 x i32> [ [[TMP19]], %[[L47]] ], [ [[TMP9]], %[[L41]] ], [ [[TMP11]], %[[L42]] ]
+; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <4 x i32> [[TMP20]], i32 0
+; CHECK-NEXT:    store i32 [[TMP21]], ptr [[P2]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <4 x i32> [[TMP20]], i32 1
+; CHECK-NEXT:    store i32 [[TMP22]], ptr [[P1]], align 4
+; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <4 x i32> [[TMP20]], i32 2
+; CHECK-NEXT:    store i32 [[TMP23]], ptr [[P2]], align 4
+; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x i32> [[TMP20]], i32 3
+; CHECK-NEXT:    store i32 [[TMP24]], ptr [[P1]], align 4
+; CHECK-NEXT:    ret void
+;
+top:
+  %2 = getelementptr i8, ptr %0, i64 8
+  %3 = getelementptr i8, ptr %0, i64 12
+  %4 = getelementptr i8, ptr %0, i64 16
+  %5 = getelementptr i8, ptr %0, i64 20
+  br i1 %c1, label %L42, label %L41
+
+L41:
+  %.not276 = icmp eq ptr %2, null
+  %6 = load i32, ptr %2, align 4
+  %7 = select i1 %.not276, i32 0, i32 %6
+  %.not277 = icmp eq ptr %3, null
+  %8 = load i32, ptr %3, align 4
+  %9 = select i1 %.not277, i32 0, i32 %8
+  %.not278 = icmp eq ptr %4, null
+  %10 = load i32, ptr %4, align 4
+  %11 = select i1 %.not278, i32 0, i32 %10
+  %.not279 = icmp eq ptr %5, null
+  %12 = load i32, ptr %5, align 4
+  %13 = select i1 %.not279, i32 0, i32 %12
+  br label %L112
+
+L42:
+  %14 = load i32, ptr %2, align 4
+  %.not280 = icmp eq i32 %14, 0
+  br i1 %.not280, label %L112, label %L47
+
+L47:
+  %15 = load i32, ptr %3, align 4
+  %.not282 = icmp eq ptr %4, null
+  %16 = load i32, ptr %4, align 4
+  %17 = select i1 %.not282, i32 0, i32 %16
+  %.not283 = icmp eq ptr %5, null
+  %18 = load i32, ptr %5, align 4
+  %19 = select i1 %.not283, i32 0, i32 %18
+  br label %L112
+
+L112:
+  %value_phi13336 = phi i32 [ %19, %L47 ], [ %13, %L41 ], [ 0, %L42 ]
+  %value_phi12335 = phi i32 [ %17, %L47 ], [ %11, %L41 ], [ %1, %L42 ]
+  %value_phi11334 = phi i32 [ %15, %L47 ], [ %9, %L41 ], [ 0, %L42 ]
+  %value_phi10333 = phi i32 [ 0, %L47 ], [ %7, %L41 ], [ 0, %L42 ]
+  store i32 %value_phi10333, ptr %p2, align 4
+  store i32 %value_phi11334, ptr %p1, align 4
+  store i32 %value_phi12335, ptr %p2, align 4
+  store i32 %value_phi13336, ptr %p1, align 4
+  ret void
+}


        


More information about the llvm-commits mailing list