[PATCH] D138819: [SLP]Fix PR59230: Use actual vector factor when sorting entries.

Alexey Bataev via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 28 10:08:29 PST 2022


ABataev updated this revision to Diff 478276.
ABataev added a comment.

Fixed another Scalars.size() use


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D138819/new/

https://reviews.llvm.org/D138819

Files:
  llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
  llvm/test/Transforms/SLPVectorizer/X86/reorder-vf-to-resize.ll


Index: llvm/test/Transforms/SLPVectorizer/X86/reorder-vf-to-resize.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/SLPVectorizer/X86/reorder-vf-to-resize.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=slp-vectorizer -mcpu=skx -mtriple=x86_64-unknown-linux-gnu -S < %s | FileCheck %s
+
+define void @main(ptr %0) {
+; CHECK-LABEL: @main(
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[TMP0:%.*]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = fsub <2 x double> zeroinitializer, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> zeroinitializer, [[TMP2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP4]], <4 x i32> <i32 1, i32 2, i32 1, i32 2>
+; CHECK-NEXT:    [[TMP6:%.*]] = fmul <4 x double> [[TMP5]], zeroinitializer
+; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x double> @llvm.fabs.v4f64(<4 x double> [[TMP6]])
+; CHECK-NEXT:    [[TMP8:%.*]] = fcmp oeq <4 x double> [[TMP7]], zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]])
+; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], double 0.000000e+00, double 0.000000e+00
+; CHECK-NEXT:    store double [[TMP10]], ptr null, align 8
+; CHECK-NEXT:    ret void
+;
+  %.unpack = load double, ptr %0, align 8
+  %.elt1 = getelementptr { double, double }, ptr %0, i64 0, i32 1
+  %.unpack2 = load double, ptr %.elt1, align 8
+  %2 = fadd double %.unpack, 0.000000e+00
+  %3 = fsub double 0.000000e+00, %.unpack2
+  %4 = fmul double %2, 0.000000e+00
+  %5 = call double @llvm.fabs.f64(double %4)
+  %6 = fmul double %3, 0.000000e+00
+  %7 = call double @llvm.fabs.f64(double %6)
+  %8 = fmul double %3, 0.000000e+00
+  %9 = call double @llvm.fabs.f64(double %8)
+  %10 = fmul double %2, 0.000000e+00
+  %11 = call double @llvm.fabs.f64(double %10)
+  %12 = fcmp oeq double %5, 0.000000e+00
+  %13 = fcmp oeq double %7, 0.000000e+00
+  %14 = or i1 %12, %13
+  %15 = fcmp oeq double %11, 0.000000e+00
+  %16 = or i1 %14, %15
+  %17 = fcmp oeq double %9, 0.000000e+00
+  %18 = or i1 %16, %17
+  %19 = select i1 %18, double 0.000000e+00, double 0.000000e+00
+  store double %19, ptr null, align 8
+  ret void
+}
+
+declare double @llvm.fabs.f64(double)
Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -4083,7 +4083,7 @@
     SmallVector<OrdersType, 1> ExternalUserReorderIndices =
         findExternalStoreUsersReorderIndices(TE.get());
     if (!ExternalUserReorderIndices.empty()) {
-      VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
+      VFToOrderedEntries[TE->getVectorFactor()].insert(TE.get());
       ExternalUserReorderMap.try_emplace(TE.get(),
                                          std::move(ExternalUserReorderIndices));
     }
@@ -4103,7 +4103,7 @@
           OpcodeMask.set(Lane);
       // If this pattern is supported by the target then we consider the order.
       if (TTIRef.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask)) {
-        VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
+        VFToOrderedEntries[TE->getVectorFactor()].insert(TE.get());
         AltShufflesToOrders.try_emplace(TE.get(), OrdersType());
       }
       // TODO: Check the reverse order too.
@@ -4141,7 +4141,7 @@
   });
 
   // Reorder the graph nodes according to their vectorization factor.
-  for (unsigned VF = VectorizableTree.front()->Scalars.size(); VF > 1;
+  for (unsigned VF = VectorizableTree.front()->getVectorFactor(); VF > 1;
        VF /= 2) {
     auto It = VFToOrderedEntries.find(VF);
     if (It == VFToOrderedEntries.end())


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D138819.478276.patch
Type: text/x-patch
Size: 3839 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20221128/b400cb94/attachment.bin>


More information about the llvm-commits mailing list