[llvm] 85f6b15 - [SLP]Do not look for buildvector sequence, if the index is reused.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Fri May 13 13:57:35 PDT 2022


Author: Alexey Bataev
Date: 2022-05-13T13:56:02-07:00
New Revision: 85f6b15ee50feb316047f52d4bd6ddc639e3c5c1

URL: https://github.com/llvm/llvm-project/commit/85f6b15ee50feb316047f52d4bd6ddc639e3c5c1
DIFF: https://github.com/llvm/llvm-project/commit/85f6b15ee50feb316047f52d4bd6ddc639e3c5c1.diff

LOG: [SLP]Do not look for buildvector sequence, if the index is reused.

If the insert indes was used already or is not constant, we should stop
looking for unique buildvector sequence, it mustbe splitted to
2 different buildvectors.

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 7f7e9e3d0c463..b9f86cd10498b 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -737,11 +737,11 @@ static void inversePermutation(ArrayRef<unsigned> Indices,
 
 /// \returns inserting index of InsertElement or InsertValue instruction,
 /// using Offset as base offset for index.
-static Optional<unsigned> getInsertIndex(Value *InsertInst,
+static Optional<unsigned> getInsertIndex(const Value *InsertInst,
                                          unsigned Offset = 0) {
   int Index = Offset;
-  if (auto *IE = dyn_cast<InsertElementInst>(InsertInst)) {
-    if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2))) {
+  if (const auto *IE = dyn_cast<InsertElementInst>(InsertInst)) {
+    if (const auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2))) {
       auto *VT = cast<FixedVectorType>(IE->getType());
       if (CI->getValue().uge(VT->getNumElements()))
         return None;
@@ -752,13 +752,13 @@ static Optional<unsigned> getInsertIndex(Value *InsertInst,
     return None;
   }
 
-  auto *IV = cast<InsertValueInst>(InsertInst);
+  const auto *IV = cast<InsertValueInst>(InsertInst);
   Type *CurrentType = IV->getType();
   for (unsigned I : IV->indices()) {
-    if (auto *ST = dyn_cast<StructType>(CurrentType)) {
+    if (const auto *ST = dyn_cast<StructType>(CurrentType)) {
       Index *= ST->getNumElements();
       CurrentType = ST->getElementType(I);
-    } else if (auto *AT = dyn_cast<ArrayType>(CurrentType)) {
+    } else if (const auto *AT = dyn_cast<ArrayType>(CurrentType)) {
       Index *= AT->getNumElements();
       CurrentType = AT->getElementType();
     } else {
@@ -6556,6 +6556,8 @@ static bool areTwoInsertFromSameBuildVector(InsertElementInst *VU,
     return false;
   auto *IE1 = VU;
   auto *IE2 = V;
+  unsigned Idx1 = *getInsertIndex(IE1);
+  unsigned Idx2 = *getInsertIndex(IE2);
   // Go through the vector operand of insertelement instructions trying to find
   // either VU as the original vector for IE2 or V as the original vector for
   // IE1.
@@ -6563,13 +6565,15 @@ static bool areTwoInsertFromSameBuildVector(InsertElementInst *VU,
     if (IE2 == VU || IE1 == V)
       return true;
     if (IE1) {
-      if (IE1 != VU && !IE1->hasOneUse())
+      if ((IE1 != VU && !IE1->hasOneUse()) ||
+          getInsertIndex(IE1).getValueOr(Idx2) == Idx2)
         IE1 = nullptr;
       else
         IE1 = dyn_cast<InsertElementInst>(IE1->getOperand(0));
     }
     if (IE2) {
-      if (IE2 != V && !IE2->hasOneUse())
+      if ((IE2 != V && !IE2->hasOneUse()) ||
+          getInsertIndex(IE2).getValueOr(Idx1) == Idx1)
         IE2 = nullptr;
       else
         IE2 = dyn_cast<InsertElementInst>(IE2->getOperand(0));
@@ -6586,6 +6590,8 @@ static bool isFirstInsertElement(const InsertElementInst *IE1,
   const auto *I2 = IE2;
   const InsertElementInst *PrevI1;
   const InsertElementInst *PrevI2;
+  unsigned Idx1 = *getInsertIndex(IE1);
+  unsigned Idx2 = *getInsertIndex(IE2);
   do {
     if (I2 == IE1)
       return true;
@@ -6593,9 +6599,11 @@ static bool isFirstInsertElement(const InsertElementInst *IE1,
       return false;
     PrevI1 = I1;
     PrevI2 = I2;
-    if (I1 && (I1 == IE1 || I1->hasOneUse()))
+    if (I1 && (I1 == IE1 || I1->hasOneUse()) &&
+        getInsertIndex(I1).getValueOr(Idx2) != Idx2)
       I1 = dyn_cast<InsertElementInst>(I1->getOperand(0));
-    if (I2 && (I2 == IE2 || I2->hasOneUse()))
+    if (I2 && ((I2 == IE2 || I2->hasOneUse())) &&
+        getInsertIndex(I2).getValueOr(Idx1) != Idx1)
       I2 = dyn_cast<InsertElementInst>(I2->getOperand(0));
   } while ((I1 && PrevI1 != I1) || (I2 && PrevI2 != I2));
   llvm_unreachable("Two 
diff erent buildvectors not expected.");
@@ -6764,7 +6772,9 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
             // Find the insertvector, vectorized in tree, if any.
             Value *Base = VU;
             while (auto *IEBase = dyn_cast<InsertElementInst>(Base)) {
-              if (IEBase != EU.User && !IEBase->hasOneUse())
+              if (IEBase != EU.User &&
+                  (!IEBase->hasOneUse() ||
+                   getInsertIndex(IEBase).getValueOr(*InsertIdx) == *InsertIdx))
                 break;
               // Build the mask for the vectorized insertelement instructions.
               if (const TreeEntry *E = getTreeEntry(IEBase)) {

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll
new file mode 100644
index 0000000000000..0d3c7809e868e
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+;RUN: opt -S -slp-vectorizer -mtriple=x86_64-unknown-linux-android23 < %s | FileCheck %s
+
+define void @test() {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr undef, i32 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x float>, ptr undef, align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = fsub <2 x float> [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
+; CHECK-NEXT:    [[TMP7:%.*]] = fcmp olt float [[TMP6]], [[TMP5]]
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0
+; CHECK-NEXT:    store <2 x float> zeroinitializer, ptr null, align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
+; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP11]], i64 0
+; CHECK-NEXT:    store <2 x float> zeroinitializer, ptr null, align 4
+; CHECK-NEXT:    ret void
+;
+  %1 = getelementptr inbounds float, ptr undef, i32 2
+  %2 = load float, ptr %1, align 4
+  %3 = load float, ptr undef, align 4
+  %4 = fsub float %2, %3
+  %5 = getelementptr inbounds float, ptr undef, i32 3
+  %6 = load float, ptr %5, align 4
+  %7 = getelementptr inbounds float, ptr undef, i32 1
+  %8 = load float, ptr %7, align 4
+  %9 = fsub float %6, %8
+  %10 = fcmp olt float %9, %4
+  %11 = insertelement <2 x float> undef, float %3, i64 0
+  %12 = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0
+  store <2 x float> zeroinitializer, ptr null, align 4
+  %13 = insertelement <2 x float> %11, float %6, i64 0
+  store <2 x float> zeroinitializer, ptr null, align 4
+  ret void
+}


        


More information about the llvm-commits mailing list