[llvm] [SLP]Do not require external uses for roots and single use for other instructions in computeMinimumValueSizes. (PR #72679)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 17 09:09:04 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Alexey Bataev (alexey-bataev)
<details>
<summary>Changes</summary>
After changes, that does not require support from InstCombine, we can
drop some extra requirements for values-to-be-demoted. No need to check
for external uses for roots/other instructions, just check that the
no non-vectorized insertelement instruction, which may require
widening.
---
Full diff: https://github.com/llvm/llvm-project/pull/72679.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+6-8)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll (+10-12)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index bff3e49b14bc6bc..42e4540eb8ecbd7 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13078,10 +13078,13 @@ bool BoUpSLP::collectValuesToDemote(Value *V,
return true;
}
- // If the value is not a vectorized instruction in the expression with only
- // one use, it cannot be demoted.
+ // If the value is not a vectorized instruction in the expression and not the
+ // used by the insertelement instruction, it cannot be demoted.
auto *I = dyn_cast<Instruction>(V);
- if (!I || !I->hasOneUse() || !getTreeEntry(I) || !Visited.insert(I).second)
+ if (!I || !getTreeEntry(I) || !Visited.insert(I).second ||
+ all_of(I->users(), [&](User *U) {
+ return isa<InsertElementInst>(U) && !getTreeEntry(U);
+ }))
return false;
switch (I->getOpcode()) {
@@ -13140,11 +13143,6 @@ bool BoUpSLP::collectValuesToDemote(Value *V,
}
void BoUpSLP::computeMinimumValueSizes() {
- // If there are no external uses, the expression tree must be rooted by a
- // store. We can't demote in-memory values, so there is nothing to do here.
- if (ExternalUses.empty())
- return;
-
// We only attempt to truncate integer expressions.
auto &TreeRoot = VectorizableTree[0]->Scalars;
auto *TreeRootIT = dyn_cast<IntegerType>(TreeRoot[0]->getType());
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll
index f48528e502b8cf1..af46b4f576234b2 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll
@@ -8,20 +8,18 @@ define i1 @test() {
; CHECK: then:
; CHECK-NEXT: br label [[ELSE]]
; CHECK: else:
-; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i1> [ zeroinitializer, [[THEN]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i1> [[TMP0]] to <2 x i32>
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
-; CHECK-NEXT: [[BF_CAST162:%.*]] = and i32 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> zeroinitializer, <2 x i32> [[TMP1]], <2 x i32> <i32 3, i32 1>
-; CHECK-NEXT: [[T13:%.*]] = and <2 x i32> [[TMP4]], zeroinitializer
+; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ zeroinitializer, [[THEN]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0
+; CHECK-NEXT: [[BF_CAST162:%.*]] = and i32 [[TMP1]], 0
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> zeroinitializer, <2 x i32> [[TMP0]], <2 x i32> <i32 3, i32 1>
+; CHECK-NEXT: [[T13:%.*]] = and <2 x i32> [[TMP2]], zeroinitializer
; CHECK-NEXT: br label [[ELSE1:%.*]]
; CHECK: else1:
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[T13]], <2 x i32> poison, <2 x i32> <i32 poison, i32 0>
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[BF_CAST162]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt <2 x i32> [[TMP6]], zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1
-; CHECK-NEXT: ret i1 [[TMP8]]
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[T13]], <2 x i32> poison, <2 x i32> <i32 poison, i32 0>
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[BF_CAST162]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt <2 x i32> [[TMP4]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
+; CHECK-NEXT: ret i1 [[TMP6]]
;
entry:
br i1 false, label %then, label %else
``````````
</details>
https://github.com/llvm/llvm-project/pull/72679
More information about the llvm-commits
mailing list