[llvm] b4a0fd4 - [SLP]Fix PR89635: do not try to vectorize single-gather alternate node.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 23 06:46:32 PDT 2024


Author: Alexey Bataev
Date: 2024-04-23T06:45:43-07:00
New Revision: b4a0fd40f1b94eac571d29ee7695b492934d9bfc

URL: https://github.com/llvm/llvm-project/commit/b4a0fd40f1b94eac571d29ee7695b492934d9bfc
DIFF: https://github.com/llvm/llvm-project/commit/b4a0fd40f1b94eac571d29ee7695b492934d9bfc.diff

LOG: [SLP]Fix PR89635: do not try to vectorize single-gather alternate node.

No need to try to vectorize single gather/buildvector with alternate
opcode graph, it is not profitable. In other cases, need to use last
instruction for inserting the vectorized code.

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-sindle-bv.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 6ac380a6ab6c6c..a1a28076881cb5 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9640,6 +9640,7 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
   bool IsAllowedSingleBVNode =
       VectorizableTree.size() > 1 ||
       (VectorizableTree.size() == 1 && VectorizableTree.front()->getOpcode() &&
+       !VectorizableTree.front()->isAltShuffle() &&
        VectorizableTree.front()->getOpcode() != Instruction::PHI &&
        VectorizableTree.front()->getOpcode() != Instruction::GetElementPtr &&
        allSameBlock(VectorizableTree.front()->Scalars));
@@ -11032,7 +11033,10 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
                         isUsedOutsideBlock(V);
                }) ||
         (E->State == TreeEntry::NeedToGather && E->Idx == 0 &&
-         all_of(E->Scalars, IsaPred<ExtractElementInst, UndefValue>)))
+         all_of(E->Scalars, [](Value *V) {
+           return isa<ExtractElementInst, UndefValue>(V) ||
+                  areAllOperandsNonInsts(V);
+         })))
       Res.second = FindLastInst();
     else
       Res.second = FindFirstInst();

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-sindle-bv.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-sindle-bv.ll
new file mode 100644
index 00000000000000..89268837c9d8e0
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-sindle-bv.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define <2 x i32> @test(i32 %arg) {
+; CHECK-LABEL: define <2 x i32> @test(
+; CHECK-SAME: i32 [[ARG:%.*]]) {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[ARG]], 0
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 0, 1
+; CHECK-NEXT:    [[MUL1:%.*]] = mul i32 [[OR]], [[MUL]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 0, [[MUL1]]
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[OR]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[MUL]], i32 1
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
+;
+bb:
+  %or = or i32 %arg, 0
+  %mul = mul i32 0, 1
+  %mul1 = mul i32 %or, %mul
+  %cmp = icmp ugt i32 0, %mul1
+  %0 = insertelement <2 x i32> poison, i32 %or, i32 0
+  %1 = insertelement <2 x i32> %0, i32 %mul, i32 1
+  ret <2 x i32> %1
+}
+


        


More information about the llvm-commits mailing list