[PATCH] D138819: [SLP]Fix PR59230: Use actual vector factor when sorting entries.
Alexey Bataev via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 29 06:47:56 PST 2022
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG0cc15050a423: [SLP]Fix PR59230: Use actual vector factor when sorting entries. (authored by ABataev).
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D138819/new/
https://reviews.llvm.org/D138819
Files:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/X86/reorder-vf-to-resize.ll
Index: llvm/test/Transforms/SLPVectorizer/X86/reorder-vf-to-resize.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/SLPVectorizer/X86/reorder-vf-to-resize.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=slp-vectorizer -mcpu=skx -mtriple=x86_64-unknown-linux-gnu -S < %s | FileCheck %s
+
+define void @main(ptr %0) {
+; CHECK-LABEL: @main(
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[TMP0:%.*]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> zeroinitializer, [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> zeroinitializer, [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP4]], <4 x i32> <i32 1, i32 2, i32 1, i32 2>
+; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x double> [[TMP5]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = call <4 x double> @llvm.fabs.v4f64(<4 x double> [[TMP6]])
+; CHECK-NEXT: [[TMP8:%.*]] = fcmp oeq <4 x double> [[TMP7]], zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]])
+; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], double 0.000000e+00, double 0.000000e+00
+; CHECK-NEXT: store double [[TMP10]], ptr null, align 8
+; CHECK-NEXT: ret void
+;
+ %.unpack = load double, ptr %0, align 8
+ %.elt1 = getelementptr { double, double }, ptr %0, i64 0, i32 1
+ %.unpack2 = load double, ptr %.elt1, align 8
+ %2 = fadd double %.unpack, 0.000000e+00
+ %3 = fsub double 0.000000e+00, %.unpack2
+ %4 = fmul double %2, 0.000000e+00
+ %5 = call double @llvm.fabs.f64(double %4)
+ %6 = fmul double %3, 0.000000e+00
+ %7 = call double @llvm.fabs.f64(double %6)
+ %8 = fmul double %3, 0.000000e+00
+ %9 = call double @llvm.fabs.f64(double %8)
+ %10 = fmul double %2, 0.000000e+00
+ %11 = call double @llvm.fabs.f64(double %10)
+ %12 = fcmp oeq double %5, 0.000000e+00
+ %13 = fcmp oeq double %7, 0.000000e+00
+ %14 = or i1 %12, %13
+ %15 = fcmp oeq double %11, 0.000000e+00
+ %16 = or i1 %14, %15
+ %17 = fcmp oeq double %9, 0.000000e+00
+ %18 = or i1 %16, %17
+ %19 = select i1 %18, double 0.000000e+00, double 0.000000e+00
+ store double %19, ptr null, align 8
+ ret void
+}
+
+declare double @llvm.fabs.f64(double)
Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -4083,7 +4083,7 @@
SmallVector<OrdersType, 1> ExternalUserReorderIndices =
findExternalStoreUsersReorderIndices(TE.get());
if (!ExternalUserReorderIndices.empty()) {
- VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
+ VFToOrderedEntries[TE->getVectorFactor()].insert(TE.get());
ExternalUserReorderMap.try_emplace(TE.get(),
std::move(ExternalUserReorderIndices));
}
@@ -4103,7 +4103,7 @@
OpcodeMask.set(Lane);
// If this pattern is supported by the target then we consider the order.
if (TTIRef.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask)) {
- VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
+ VFToOrderedEntries[TE->getVectorFactor()].insert(TE.get());
AltShufflesToOrders.try_emplace(TE.get(), OrdersType());
}
// TODO: Check the reverse order too.
@@ -4141,7 +4141,7 @@
});
// Reorder the graph nodes according to their vectorization factor.
- for (unsigned VF = VectorizableTree.front()->Scalars.size(); VF > 1;
+ for (unsigned VF = VectorizableTree.front()->getVectorFactor(); VF > 1;
VF /= 2) {
auto It = VFToOrderedEntries.find(VF);
if (It == VFToOrderedEntries.end())
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D138819.478571.patch
Type: text/x-patch
Size: 3839 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20221129/e731b327/attachment.bin>
More information about the llvm-commits
mailing list