[PATCH] D44868: [SLP] Distinguish "demanded and shrinkable" from "demanded and not shrinkable" values when determining the minimum bitwidth

Haicheng Wu via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 26 12:17:19 PDT 2018


haicheng updated this revision to Diff 139833.
haicheng marked an inline comment as done.
haicheng added a comment.

Rebase the test case.


Repository:
  rL LLVM

https://reviews.llvm.org/D44868

Files:
  lib/Transforms/Vectorize/SLPVectorizer.cpp
  test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll


Index: test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll
===================================================================
--- test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll
+++ test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll
@@ -9,20 +9,24 @@
 ; Make sure types of sub and its sources are not extended.
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[Z0:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i64>
-; CHECK-NEXT:    [[Z1:%.*]] = zext <4 x i16> [[B:%.*]] to <4 x i64>
-; CHECK-NEXT:    [[SUB0:%.*]] = sub nsw <4 x i64> [[Z0]], [[Z1]]
-; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x i64> [[SUB0]], i32 0
-; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[Z0:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[Z1:%.*]] = zext <4 x i16> [[B:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[SUB0:%.*]] = sub nsw <4 x i32> [[Z0]], [[Z1]]
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x i32> [[SUB0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[TMP0]] to i64
+; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[LOAD0:%.*]] = load i64, i64* [[GEP0]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i64> [[SUB0]], i32 1
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[SUB0]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = sext i32 [[TMP2]] to i64
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 [[TMP3]]
 ; CHECK-NEXT:    [[LOAD1:%.*]] = load i64, i64* [[GEP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i64> [[SUB0]], i32 2
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> [[SUB0]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP4]] to i64
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 [[TMP5]]
 ; CHECK-NEXT:    [[LOAD2:%.*]] = load i64, i64* [[GEP2]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i64> [[SUB0]], i32 3
-; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[SUB0]], i32 3
+; CHECK-NEXT:    [[TMP7:%.*]] = sext i32 [[TMP6]] to i64
+; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 [[TMP7]]
 ; CHECK-NEXT:    [[LOAD3:%.*]] = load i64, i64* [[GEP3]], align 4
 ; CHECK-NEXT:    call void @foo(i64 [[LOAD0]], i64 [[LOAD1]], i64 [[LOAD2]], i64 [[LOAD3]])
 ; CHECK-NEXT:    ret void
Index: lib/Transforms/Vectorize/SLPVectorizer.cpp
===================================================================
--- lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -4301,24 +4301,9 @@
   // additional roots that require investigating in Roots.
   SmallVector<Value *, 32> ToDemote;
   SmallVector<Value *, 4> Roots;
-  for (auto *Root : TreeRoot) {
-    // Do not include top zext/sext/trunc operations to those to be demoted, it
-    // produces noise cast<vect>, trunc <vect>, exctract <vect>, cast <extract>
-    // sequence.
-    if (isa<Constant>(Root))
-      continue;
-    auto *I = dyn_cast<Instruction>(Root);
-    if (!I || !I->hasOneUse() || !Expr.count(I))
-      return;
-    if (isa<ZExtInst>(I) || isa<SExtInst>(I))
-      continue;
-    if (auto *TI = dyn_cast<TruncInst>(I)) {
-      Roots.push_back(TI->getOperand(0));
-      continue;
-    }
+  for (auto *Root : TreeRoot)
     if (!collectValuesToDemote(Root, Expr, ToDemote, Roots))
       return;
-  }
 
   // The maximum bit width required to represent all the values that can be
   // demoted without loss of precision. It would be safe to truncate the roots
@@ -4347,7 +4332,11 @@
   // We start by looking at each entry that can be demoted. We compute the
   // maximum bit width required to store the scalar by using ValueTracking to
   // compute the number of high-order bits we can truncate.
-  if (MaxBitWidth == DL->getTypeSizeInBits(TreeRoot[0]->getType())) {
+  if (MaxBitWidth == DL->getTypeSizeInBits(TreeRoot[0]->getType()) &&
+      llvm::all_of(TreeRoot, [](Value *R) {
+        assert(R->hasOneUse() && "Root should have only one use!");
+        return isa<GetElementPtrInst>(R->user_back());
+      })) {
     MaxBitWidth = 8u;
 
     // Determine if the sign bit of all the roots is known to be zero. If not,


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D44868.139833.patch
Type: text/x-patch
Size: 4530 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180326/e6d0cbc9/attachment.bin>


More information about the llvm-commits mailing list