[llvm] fd72733 - [SLP] Do not ignore ordering for root node when it has in-tree uses.
Valery N Dmitriev via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 10 10:13:13 PST 2023
Author: Valery N Dmitriev
Date: 2023-01-10T10:12:51-08:00
New Revision: fd7273359a33647b79df93d82a12a3ac6d589348
URL: https://github.com/llvm/llvm-project/commit/fd7273359a33647b79df93d82a12a3ac6d589348
DIFF: https://github.com/llvm/llvm-project/commit/fd7273359a33647b79df93d82a12a3ac6d589348.diff
LOG: [SLP] Do not ignore ordering for root node when it has in-tree uses.
When rooted with PHIs, a vectorization tree may have another node with PHIs
which have roots as their operands. We cannot ignore ordering information
for root in such a case.
Differential Revision: https://reviews.llvm.org/D141309
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/X86/reorder-phi-operand.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 49ad18cd61826..746a2488c6996 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1037,6 +1037,12 @@ class BoUpSLP {
return VectorizableTree.front()->getMainOp();
}
+ /// Returns whether the root node has in-tree uses.
+ bool doesRootHaveInTreeUses() const {
+ return !VectorizableTree.empty() &&
+ !VectorizableTree.front()->UserTreeIndices.empty();
+ }
+
/// Builds external uses of the vectorized scalars, i.e. the list of
/// vectorized scalars to be extracted, their lanes and their scalar users. \p
/// ExternallyUsedValues contains additional list of external uses to handle
@@ -11487,7 +11493,9 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
if (R.isTreeTinyAndNotFullyVectorizable())
continue;
R.reorderTopToBottom();
- R.reorderBottomToTop(!isa<InsertElementInst>(Ops.front()));
+ R.reorderBottomToTop(
+ /*IgnoreReorder=*/!isa<InsertElementInst>(Ops.front()) &&
+ !R.doesRootHaveInTreeUses());
R.buildExternalUses();
R.computeMinimumValueSizes();
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-phi-operand.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-phi-operand.ll
index 66c3f69306af8..787bd39759dc7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-phi-operand.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-phi-operand.ll
@@ -82,3 +82,99 @@ return:
store float %add2.i.sink, ptr %d.i.i.i, align 4
ret void
}
+
+; Here PHIs have mutual uses of each other. Reordering one requires reordering the other.
+define void @test2(ptr %p1, ptr %p2) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds ptr, ptr [[P1:%.*]], i32 0
+; CHECK-NEXT: [[B1:%.*]] = getelementptr inbounds ptr, ptr [[P1]], i32 4
+; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A1]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[B1]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x double> <double 1.000000e+01, double 1.000000e-01>, [[TMP0]]
+; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x double> <double 1.100000e+01, double 1.100000e+00>, [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> <double 1.000000e+01, double 1.000000e-01>, [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <2 x double> [[TMP4]], <double 2.000000e+00, double 2.100000e+00>
+; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[TMP2]], <double 1.000000e+01, double 1.000000e-01>
+; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <2 x double> [[TMP6]], [[TMP5]]
+; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <2 x double> [[TMP7]], <double 3.000000e+00, double 3.100000e+00>
+; CHECK-NEXT: br label [[BB1:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> <double 4.000000e+00, double 4.100000e+00>, [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP9]], <double 2.000000e+00, double 2.100000e+00>
+; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[TMP10]], <double 3.000000e+00, double 3.100000e+00>
+; CHECK-NEXT: br label [[BB2:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x double> [ [[TMP11]], [[BB1]] ], [ [[TMP16:%.*]], [[BB6:%.*]] ]
+; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP12]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: [[X0:%.*]] = getelementptr inbounds double, ptr [[P2:%.*]], i32 0
+; CHECK-NEXT: [[TMP14:%.*]] = load <2 x double>, ptr [[X0]], align 8
+; CHECK-NEXT: br i1 poison, label [[BB3:%.*]], label [[BB6]]
+; CHECK: bb3:
+; CHECK-NEXT: br i1 poison, label [[BB5:%.*]], label [[BB4:%.*]]
+; CHECK: bb4:
+; CHECK-NEXT: br label [[BB6]]
+; CHECK: bb5:
+; CHECK-NEXT: br label [[BB6]]
+; CHECK: bb6:
+; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x double> [ [[TMP13]], [[BB2]] ], [ [[TMP14]], [[BB4]] ], [ [[TMP14]], [[BB5]] ]
+; CHECK-NEXT: [[TMP16]] = shufflevector <2 x double> [[TMP15]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: br label [[BB2]]
+;
+entry:
+ %a1 = getelementptr inbounds ptr, ptr %p1, i32 0
+ %a2 = getelementptr inbounds ptr, ptr %p1, i32 1
+ %b2 = getelementptr inbounds ptr, ptr %p1, i32 5
+ %lda1 = load double, ptr %a1, align 8
+ %lda2 = load double, ptr %a2, align 8
+ %b1 = getelementptr inbounds ptr, ptr %p1, i32 4
+ %ldb1 = load double, ptr %b1, align 8
+ %ldb2 = load double, ptr %b2, align 8
+ %mul0.1 = fmul fast double 0.1, %lda2
+ %mul1.1 = fmul fast double 1.1, %ldb2
+ %sub0.1 = fsub fast double 0.1, %mul1.1
+ %mul2.1 = fmul fast double %sub0.1, 2.1
+ %add0.1 = fadd fast double %mul0.1, 0.1
+ %add1.1 = fadd fast double %add0.1, %mul2.1
+ %mul3.1 = fmul fast double %add1.1, 3.1
+ %mul0.0 = fmul fast double 10.0, %lda1
+ %mul1.0 = fmul fast double 11.0, %ldb1
+ %sub0.0 = fsub fast double 10.0, %mul1.0
+ %mul2.0 = fmul fast double %sub0.0, 2.0
+ %add0.0 = fadd fast double %mul0.0, 10.0
+ %add1.0 = fadd fast double %add0.0, %mul2.0
+ %mul3.0 = fmul fast double 3.0, %add1.0
+ br label %bb1
+
+bb1:
+ %add4.1 = fadd fast double 4.1, %mul3.1
+ %add2.1 = fadd fast double %add4.1, 2.1
+ %add3.1 = fadd fast double %add2.1, 3.1
+ %add4.0 = fadd fast double 4.0, %mul3.0
+ %add2.0 = fadd fast double %add4.0, 2.0
+ %add3.0 = fadd fast double %add2.0, 3.0
+ br label %bb2
+
+bb2: ; preds = %bb6, %bb1
+ %phi0.0 = phi double [ %add3.1, %bb1 ], [ %phi1.0, %bb6 ]
+ %phi0.1 = phi double [ %add3.0, %bb1 ], [ %phi1.1, %bb6 ]
+ %x0 = getelementptr inbounds double, ptr %p2, i32 0
+ %i0 = load double, ptr %x0, align 8
+ %x1 = getelementptr inbounds double, ptr %p2, i32 1
+ %i1 = load double, ptr %x1, align 8
+ br i1 poison, label %bb3, label %bb6
+
+bb3: ; preds = %bb2
+ br i1 poison, label %bb5, label %bb4
+
+bb4: ; preds = %bb3
+ br label %bb6
+
+bb5: ; preds = %bb3
+ br label %bb6
+
+bb6: ; preds = %bb5, %bb4, %bb3
+ %phi1.0 = phi double [ %phi0.0, %bb2 ], [ %i0, %bb4 ], [ %i0, %bb5 ]
+ %phi1.1 = phi double [ %phi0.1, %bb2 ], [ %i1, %bb4 ], [ %i1, %bb5 ]
+ br label %bb2
+}
More information about the llvm-commits
mailing list