[llvm] 76782a6 - [SLP]Use original vector if need to shuffle truncated root.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 16 10:41:37 PDT 2022


Author: Alexey Bataev
Date: 2022-06-16T10:41:18-07:00
New Revision: 76782a65ee86b865eb748e0417eca28fa50a0f5e

URL: https://github.com/llvm/llvm-project/commit/76782a65ee86b865eb748e0417eca28fa50a0f5e
DIFF: https://github.com/llvm/llvm-project/commit/76782a65ee86b865eb748e0417eca28fa50a0f5e.diff

LOG: [SLP]Use original vector if need to shuffle truncated root.

If the root scalar is mapped to to the smallest bit width, the vector is
truncated and the types between original buildvector and extracted value
mismatched. For extract, we emit sext/zext instructions, for shuffles we
can reuse oringal vector instead of the truncated one.

Differential Revision: https://reviews.llvm.org/D127974

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 132e15cc2c58..b1e187a9e843 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -8472,6 +8472,10 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
         if (auto *FTy = dyn_cast<FixedVectorType>(User->getType())) {
           Optional<unsigned> InsertIdx = getInsertIndex(VU);
           if (InsertIdx) {
+            // Need to use original vector, if the root is truncated.
+            if (MinBWs.count(Scalar) &&
+                VectorizableTree[0]->VectorizedValue == Vec)
+              Vec = VectorRoot;
             auto *It =
                 find_if(ShuffledInserts, [VU](const ShuffledInsertData &Data) {
                   // Checks if 2 insertelements are from the same buildvector.

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll
new file mode 100644
index 000000000000..57e633b376a9
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -slp-vectorizer -S -slp-threshold=-100 -mtriple=x86_64 < %s | FileCheck %s
+
+define i1 @test() {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    br label [[ELSE]]
+; CHECK:       else:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i32> [ zeroinitializer, [[THEN]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[TMP0]] to <2 x i8>
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i8> [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP2]] to i32
+; CHECK-NEXT:    [[BF_CAST162:%.*]] = and i32 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> zeroinitializer, <2 x i32> [[TMP0]], <2 x i32> <i32 3, i32 1>
+; CHECK-NEXT:    [[T13:%.*]] = and <2 x i32> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    br label [[ELSE1:%.*]]
+; CHECK:       else1:
+; CHECK-NEXT:    [[T20:%.*]] = extractelement <2 x i32> [[T13]], i64 0
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[BF_CAST162]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[T20]], i32 1
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp ugt <2 x i32> [[TMP6]], zeroinitializer
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1
+; CHECK-NEXT:    ret i1 [[TMP8]]
+;
+entry:
+  br i1 false, label %then, label %else
+
+then:
+  br label %else
+
+else:
+  %bf.load.off43 = phi i32 [ 0, %then ], [ 0, %entry ]
+  %bf.load.off44 = phi i32 [ 0, %then ], [ 0, %entry ]
+  %bf.cast162 = and i32 %bf.load.off43, 0
+  %t12 = insertelement <2 x i32> zeroinitializer, i32 %bf.load.off44, i64 0
+  %t13 = and <2 x i32> %t12, zeroinitializer
+  br label %else1
+
+else1:
+  %cmp40 = icmp ugt i32 %bf.cast162, 0
+  %t20 = extractelement <2 x i32> %t13, i64 0
+  %cmp50 = icmp ugt i32 %t20, 0
+  ret i1 %cmp50
+}


        


More information about the llvm-commits mailing list