[llvm] b868a2d - [SLPVectorizer] Fix crash in vectorizeChainsInBlock for scalable vector.

Caroline Concatto via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 5 04:46:38 PDT 2021


Author: Caroline Concatto
Date: 2021-07-05T12:43:41+01:00
New Revision: b868a2d2c69c20ba0f906354986354b84540e6c0

URL: https://github.com/llvm/llvm-project/commit/b868a2d2c69c20ba0f906354986354b84540e6c0
DIFF: https://github.com/llvm/llvm-project/commit/b868a2d2c69c20ba0f906354986354b84540e6c0.diff

LOG: [SLPVectorizer] Fix crash in vectorizeChainsInBlock for scalable vector.

The function vectorizeChainsInBlock does not support scalable vector,
because function like canReuseExtract and isCommutative in the code
path assert with scalable vectors.

This patch avoids vectorizing blocks that have extract instructions with scalable
vector..

Differential Revision: https://reviews.llvm.org/D104809

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e655227c2de4..f687cce8d0e2 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2675,6 +2675,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
     return;
   }
 
+  // Don't handle scalable vectors
+  if (S.getOpcode() == Instruction::ExtractElement &&
+      isa<ScalableVectorType>(
+          cast<ExtractElementInst>(S.OpValue)->getVectorOperandType())) {
+    LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
+    newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
+    return;
+  }
+
   // Don't handle vectors.
   if (S.OpValue->getType()->isVectorTy() &&
       !isa<InsertElementInst>(S.OpValue)) {
@@ -3739,7 +3748,9 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
                                  0);
     }
     if (E->getOpcode() == Instruction::ExtractElement && allSameType(VL) &&
-        allSameBlock(VL)) {
+        allSameBlock(VL) &&
+        !isa<ScalableVectorType>(
+            cast<ExtractElementInst>(E->getMainOp())->getVectorOperandType())) {
       // Check that gather of extractelements can be represented as just a
       // shuffle of a single/two vectors the scalars are extracted from.
       SmallVector<int> Mask;

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll
index 9f82f4329e41..d47da3039975 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll
@@ -44,5 +44,99 @@ end:
   ret <vscale x 4 x i32> %retval
 }
 
+define void @scalable_phi1() {
+; CHECK-LABEL: @scalable_phi1(
+; CHECK-NEXT:  middle.block:
+; CHECK-NEXT:    [[EXTRACT1:%.*]] = extractelement <vscale x 8 x i16> undef, i32 undef
+; CHECK-NEXT:    [[EXTRACT2:%.*]] = extractelement <vscale x 8 x i16> undef, i32 undef
+; CHECK-NEXT:    br label [[FOR_BODY_I:%.*]]
+; CHECK:       for.body.i:
+; CHECK-NEXT:    [[RECUR1:%.*]] = phi i16 [ [[EXTRACT1]], [[MIDDLE_BLOCK:%.*]] ], [ undef, [[FOR_BODY_I]] ]
+; CHECK-NEXT:    [[RECUR2:%.*]] = phi i16 [ [[EXTRACT2]], [[MIDDLE_BLOCK]] ], [ undef, [[FOR_BODY_I]] ]
+; CHECK-NEXT:    br label [[FOR_BODY_I]]
+;
+middle.block:
+  %extract1 = extractelement <vscale x 8 x i16> undef, i32 undef
+  %extract2 = extractelement <vscale x 8 x i16> undef, i32 undef
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.body.i, %middle.block
+  %recur1 = phi i16 [ %extract1, %middle.block ], [ undef, %for.body.i ]
+  %recur2 = phi i16 [ %extract2, %middle.block ], [ undef, %for.body.i ]
+  br label %for.body.i
+}
+
+define void @scalable_phi2() {
+; CHECK-LABEL: @scalable_phi2(
+; CHECK-NEXT:  middle.block:
+; CHECK-NEXT:    [[EXTRACT1:%.*]] = extractelement <vscale x 8 x i16> undef, i32 undef
+; CHECK-NEXT:    [[EXTRACT2:%.*]] = extractelement <vscale x 8 x i16> undef, i32 undef
+; CHECK-NEXT:    br label [[FOR_BODY_I:%.*]]
+; CHECK:       for.body.i:
+; CHECK-NEXT:    [[RECUR1:%.*]] = phi i16 [ undef, [[FOR_BODY_I]] ], [ [[EXTRACT1]], [[MIDDLE_BLOCK:%.*]] ]
+; CHECK-NEXT:    [[RECUR2:%.*]] = phi i16 [ undef, [[FOR_BODY_I]] ], [ [[EXTRACT2]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY_I]]
+;
+middle.block:
+  %extract1 = extractelement <vscale x 8 x i16> undef, i32 undef
+  %extract2 = extractelement <vscale x 8 x i16> undef, i32 undef
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.body.i, %middle.block
+  %recur1 = phi i16 [ undef, %for.body.i ], [ %extract1, %middle.block ]
+  %recur2 = phi i16 [ undef, %for.body.i ], [ %extract2, %middle.block ]
+  br label %for.body.i
+}
+
+define <vscale x 4 x i32> @build_vec_v4i32_reuse_0(<vscale x 2 x i32> %v0) {
+; CHECK-LABEL: @build_vec_v4i32_reuse_0(
+; CHECK-NEXT:    [[V0_0:%.*]] = extractelement <vscale x 2 x i32> [[V0:%.*]], i32 0
+; CHECK-NEXT:    [[V0_1:%.*]] = extractelement <vscale x 2 x i32> [[V0]], i32 1
+; CHECK-NEXT:    [[TMP0_0:%.*]] = add i32 [[V0_0]], [[V0_0]]
+; CHECK-NEXT:    [[TMP1_0:%.*]] = sub i32 [[V0_0]], [[V0_1]]
+; CHECK-NEXT:    [[TMP2_0:%.*]] = add i32 [[TMP0_0]], [[TMP1_0]]
+; CHECK-NEXT:    [[TMP3_0:%.*]] = insertelement <vscale x 4 x i32> undef, i32 [[TMP2_0]], i32 0
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3_0]]
+;
+  %v0.0 = extractelement <vscale x 2 x i32> %v0, i32 0
+  %v0.1 = extractelement <vscale x 2 x i32> %v0, i32 1
+  %tmp0.0 = add i32 %v0.0, %v0.0
+  %tmp1.0 = sub i32 %v0.0, %v0.1
+  %tmp2.0 = add i32 %tmp0.0, %tmp1.0
+  %tmp3.0 = insertelement <vscale x 4 x i32> undef, i32 %tmp2.0, i32 0
+  ret <vscale x 4 x i32> %tmp3.0
+}
+
+define <vscale x 4 x i8> @shuffle(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @shuffle(
+; CHECK-NEXT:    [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0
+; CHECK-NEXT:    [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3
+; CHECK-NEXT:    [[Y1:%.*]] = extractelement <4 x i8> [[Y:%.*]], i32 1
+; CHECK-NEXT:    [[Y2:%.*]] = extractelement <4 x i8> [[Y]], i32 2
+; CHECK-NEXT:    [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
+; CHECK-NEXT:    [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
+; CHECK-NEXT:    [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
+; CHECK-NEXT:    [[Y2Y2:%.*]] = mul i8 [[Y2]], [[Y2]]
+; CHECK-NEXT:    [[INS1:%.*]] = insertelement <vscale x 4 x i8> poison, i8 [[X0X0]], i32 0
+; CHECK-NEXT:    [[INS2:%.*]] = insertelement <vscale x 4 x i8> [[INS1]], i8 [[X3X3]], i32 1
+; CHECK-NEXT:    [[INS3:%.*]] = insertelement <vscale x 4 x i8> [[INS2]], i8 [[Y1Y1]], i32 2
+; CHECK-NEXT:    [[INS4:%.*]] = insertelement <vscale x 4 x i8> [[INS3]], i8 [[Y2Y2]], i32 3
+; CHECK-NEXT:    ret <vscale x 4 x i8> [[INS4]]
+;
+  %x0 = extractelement <4 x i8> %x, i32 0
+  %x3 = extractelement <4 x i8> %x, i32 3
+  %y1 = extractelement <4 x i8> %y, i32 1
+  %y2 = extractelement <4 x i8> %y, i32 2
+  %x0x0 = mul i8 %x0, %x0
+  %x3x3 = mul i8 %x3, %x3
+  %y1y1 = mul i8 %y1, %y1
+  %y2y2 = mul i8 %y2, %y2
+  %ins1 = insertelement <vscale x 4 x i8> poison, i8 %x0x0, i32 0
+  %ins2 = insertelement <vscale x 4 x i8> %ins1, i8 %x3x3, i32 1
+  %ins3 = insertelement <vscale x 4 x i8> %ins2, i8 %y1y1, i32 2
+  %ins4 = insertelement <vscale x 4 x i8> %ins3, i8 %y2y2, i32 3
+  ret  <vscale x 4 x i8> %ins4
+}
+
 declare <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>*, i32 immarg, <vscale x 16 x i1>, <vscale x 16 x i8>)
 declare void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>*, i32 immarg, <vscale x 16 x i1>)


        


More information about the llvm-commits mailing list