[llvm] 6b7b18a - [SLP]Fix PR87329: crash on alternate cast vectorization.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 2 08:27:06 PDT 2024


Author: Alexey Bataev
Date: 2024-04-02T08:19:29-07:00
New Revision: 6b7b18a1a7fd37cea5f5d51569309e7ec954cefb

URL: https://github.com/llvm/llvm-project/commit/6b7b18a1a7fd37cea5f5d51569309e7ec954cefb
DIFF: https://github.com/llvm/llvm-project/commit/6b7b18a1a7fd37cea5f5d51569309e7ec954cefb.diff

LOG: [SLP]Fix PR87329: crash on alternate cast vectorization.

Need to fix the analysis for the alternate instructions, based on int
extension operations. If the alternate extension node is resized, but
not the operand, need to resize the node and do not shuffle final
result, we end up only with trunc instruction.

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/ext-int-reduced-not-operand.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b1940e3feed958..2cc42964da285c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9061,6 +9061,17 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
         Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType();
         auto *Src0Ty = FixedVectorType::get(Src0SclTy, VL.size());
         auto *Src1Ty = FixedVectorType::get(Src1SclTy, VL.size());
+        if (It != MinBWs.end()) {
+          if (!MinBWs.contains(getOperandEntry(E, 0)))
+            VecCost =
+                TTIRef.getCastInstrCost(Instruction::Trunc, VecTy, Src0Ty,
+                                        TTI::CastContextHint::None, CostKind);
+          LLVM_DEBUG({
+            dbgs() << "SLP: alternate extension, which should be truncated.\n";
+            E->dump();
+          });
+          return VecCost;
+        }
         VecCost = TTIRef.getCastInstrCost(E->getOpcode(), VecTy, Src0Ty,
                                           TTI::CastContextHint::None, CostKind);
         VecCost +=
@@ -12571,6 +12582,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
         CmpInst::Predicate AltPred = AltCI->getPredicate();
         V1 = Builder.CreateCmp(AltPred, LHS, RHS);
       } else {
+        if (It != MinBWs.end()) {
+          if (!MinBWs.contains(getOperandEntry(E, 0)))
+            LHS = Builder.CreateIntCast(LHS, VecTy, It->second.first);
+          assert(LHS->getType() == VecTy && "Expected same type as operand.");
+          if (auto *I = dyn_cast<Instruction>(LHS))
+            LHS = propagateMetadata(I, E->Scalars);
+          E->VectorizedValue = LHS;
+          ++NumVectorInstructions;
+          return LHS;
+        }
         V0 = Builder.CreateCast(
             static_cast<Instruction::CastOps>(E->getOpcode()), LHS, VecTy);
         V1 = Builder.CreateCast(

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/ext-int-reduced-not-operand.ll b/llvm/test/Transforms/SLPVectorizer/X86/ext-int-reduced-not-operand.ll
new file mode 100644
index 00000000000000..05534fa961ee46
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/ext-int-reduced-not-operand.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-99999 < %s | FileCheck %s
+
+define i64 @wombat() {
+; CHECK-LABEL: define i64 @wombat() {
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB2]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ 0, [[BB1:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[PHI]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i1>
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = zext i1 [[TMP5]] to i64
+; CHECK-NEXT:    [[OR:%.*]] = or i64 [[TMP4]], [[TMP6]]
+; CHECK-NEXT:    ret i64 [[OR]]
+;
+bb:
+  br label %bb2
+
+bb1:
+  br label %bb2
+
+bb2:
+  %phi = phi i32 [ 0, %bb ], [ 0, %bb1 ]
+  %zext = zext i32 %phi to i64
+  %sext = sext i32 %phi to i64
+  %or = or i64 %zext, %sext
+  ret i64 %or
+}


        


More information about the llvm-commits mailing list