[llvm] 9cb7dff - [SLP]Fix PR80027: handle case when ext is not reduced but its operand is.

Tue Apr 2 09:35:34 PDT 2024

Author: Alexey Bataev
Date: 2024-04-02T09:32:25-07:00
New Revision: 9cb7dffa88190c36db0dc49a7dd2b67e0a44f97e

URL: https://github.com/llvm/llvm-project/commit/9cb7dffa88190c36db0dc49a7dd2b67e0a44f97e
DIFF: https://github.com/llvm/llvm-project/commit/9cb7dffa88190c36db0dc49a7dd2b67e0a44f97e.diff

LOG: [SLP]Fix PR80027: handle case when ext is not reduced but its operand is.

Need to handle the case, where the resize operation itself is not
reduced but its operand is. In this case need to take an extra analysis
for the operand, not the instruction itself.

Added: 
    llvm/test/Transforms/SLPVectorizer/SystemZ/ext-not-resized-op-resized.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2cc42964da285c..3ba0336db365fa 100644

--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -8791,6 +8791,10 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
       } else if (It != MinBWs.end()) {
         assert(BWSz > SrcBWSz && "Invalid cast!");
         VecOpcode = It->second.second ? Instruction::SExt : Instruction::ZExt;
+      } else if (SrcIt != MinBWs.end()) {
+        assert(BWSz > SrcBWSz && "Invalid cast!");
+        VecOpcode =
+            SrcIt->second.second ? Instruction::SExt : Instruction::ZExt;
       }
     }
     auto GetScalarCost = [&](unsigned Idx) -> InstructionCost {
@@ -12142,6 +12146,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
           VecOpcode = Instruction::BitCast;
         } else if (BWSz < SrcBWSz) {
           VecOpcode = Instruction::Trunc;
+        } else if (It != MinBWs.end()) {
+          assert(BWSz > SrcBWSz && "Invalid cast!");
+          VecOpcode = It->second.second ? Instruction::SExt : Instruction::ZExt;
         } else if (SrcIt != MinBWs.end()) {
           assert(BWSz > SrcBWSz && "Invalid cast!");
           VecOpcode =

diff  --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/ext-not-resized-op-resized.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/ext-not-resized-op-resized.ll
new file mode 100644
index 00000000000000..a7bb272b44dc4b
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/ext-not-resized-op-resized.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S --passes=slp-vectorizer -mtriple=systemz-unknown -mcpu=z13 < %s | FileCheck %s
+
+define void @test(i64 %0, i1 %.cmp.i.2, i1 %1, ptr %a) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: i64 [[TMP0:%.*]], i1 [[DOTCMP_I_2:%.*]], i1 [[TMP1:%.*]], ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr <4 x i64> [[TMP4]], <i64 63, i64 63, i64 63, i64 63>
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i1> poison, i1 [[DOTCMP_I_2]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i1> [[TMP6]], i1 [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i1> [[TMP7]], <4 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x i1> [[TMP7]], <4 x i1> poison, <4 x i32> <i32 1, i32 1, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP10:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i1>
+; CHECK-NEXT:    [[TMP11:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP10]], <4 x i1> [[TMP8]]
+; CHECK-NEXT:    [[TMP12:%.*]] = zext <4 x i1> [[TMP11]] to <4 x i32>
+; CHECK-NEXT:    [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP14:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP13]])
+; CHECK-NEXT:    store i32 [[TMP14]], ptr [[A]], align 4
+; CHECK-NEXT:    ret void
+;
+  %.lobit.i.2 = lshr i64 %0, 63
+  %3 = zext i1 %.cmp.i.2 to i64
+  %4 = select i1 %1, i64 %.lobit.i.2, i64 %3
+  %5 = trunc i64 %4 to i32
+  %6 = xor i32 %5, 1
+  %.lobit.i.3 = lshr i64 %0, 63
+  %7 = zext i1 %.cmp.i.2 to i64
+  %8 = select i1 %1, i64 %.lobit.i.3, i64 %7
+  %9 = trunc i64 %8 to i32
+  %10 = xor i32 %9, 1
+  %11 = or i32 %10, %6
+  %.lobit.i.4 = lshr i64 %0, 63
+  %12 = zext i1 %1 to i64
+  %13 = select i1 %.cmp.i.2, i64 %.lobit.i.4, i64 %12
+  %14 = trunc i64 %13 to i32
+  %15 = xor i32 %14, 1
+  %16 = or i32 %15, %11
+  %.lobit.i.5 = lshr i64 %0, 63
+  %17 = zext i1 %.cmp.i.2 to i64
+  %18 = select i1 %1, i64 %.lobit.i.5, i64 %17
+  %19 = trunc i64 %18 to i32
+  %20 = xor i32 %19, 1
+  %21 = or i32 %20, %16
+  store i32 %21, ptr %a, align 4
+  ret void
+}
+