[llvm] c1d46d3 - [SLPVectorizer] Fix crash in isShuffle with scalable vectors

Fri Oct 1 02:58:10 PDT 2021

Author: Kerry McLaughlin
Date: 2021-10-01T10:56:44+01:00
New Revision: c1d46d34619bea0d9906874baafb5218e15e3b2a

URL: https://github.com/llvm/llvm-project/commit/c1d46d34619bea0d9906874baafb5218e15e3b2a
DIFF: https://github.com/llvm/llvm-project/commit/c1d46d34619bea0d9906874baafb5218e15e3b2a.diff

LOG: [SLPVectorizer] Fix crash in isShuffle with scalable vectors

D104809 changed `buildTree_rec` to check for extract element instructions
with scalable types. However, if the extract is extended or truncated,
these changes do not apply and we assert later on in isShuffle(), which
attempts to cast the type of the extract to FixedVectorType.

Reviewed By: ABataev

Differential Revision: https://reviews.llvm.org/D110640

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 69889263c0763..212623fc7350c 100644

--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -317,8 +317,10 @@ static bool isCommutative(Instruction *I) {
 /// TODO: Can we split off and reuse the shuffle mask detection from
 /// TargetTransformInfo::getInstructionThroughput?
 static Optional<TargetTransformInfo::ShuffleKind>
-isShuffle(ArrayRef<Value *> VL, SmallVectorImpl<int> &Mask) {
+isFixedVectorShuffle(ArrayRef<Value *> VL, SmallVectorImpl<int> &Mask) {
   auto *EI0 = cast<ExtractElementInst>(VL[0]);
+  if (isa<ScalableVectorType>(EI0->getVectorOperandType()))
+    return None;
   unsigned Size =
       cast<FixedVectorType>(EI0->getVectorOperandType())->getNumElements();
   Value *Vec1 = nullptr;
@@ -4320,7 +4322,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
       // shuffle of a single/two vectors the scalars are extracted from.
       SmallVector<int> Mask;
       Optional<TargetTransformInfo::ShuffleKind> ShuffleKind =
-          isShuffle(VL, Mask);
+          isFixedVectorShuffle(VL, Mask);
       if (ShuffleKind.hasValue()) {
         // Found the bunch of extractelement instructions that must be gathered
         // into a vector and can be represented as a permutation elements in a
@@ -4892,7 +4894,7 @@ bool BoUpSLP::isFullyVectorizableTinyTree() const {
             VectorizableTree[0]->Scalars.size()) ||
        (VectorizableTree[1]->State == TreeEntry::NeedToGather &&
         VectorizableTree[1]->getOpcode() == Instruction::ExtractElement &&
-        isShuffle(VectorizableTree[1]->Scalars, Mask))))
+        isFixedVectorShuffle(VectorizableTree[1]->Scalars, Mask))))
     return true;
 
   // Gathering cost would be too much for tiny trees.
@@ -8912,7 +8914,7 @@ bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
   if (!findBuildAggregate(IEI, TTI, BuildVectorOpds, BuildVectorInsts) ||
       (llvm::all_of(BuildVectorOpds,
                     [](Value *V) { return isa<ExtractElementInst>(V); }) &&
-       isShuffle(BuildVectorOpds, Mask)))
+       isFixedVectorShuffle(BuildVectorOpds, Mask)))
     return false;
 
   LLVM_DEBUG(dbgs() << "SLP: array mappable to vector: " << *IEI << "\n");

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll
index d47da30399755..a5cfc7ba3034d 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll
@@ -138,5 +138,62 @@ define <vscale x 4 x i8> @shuffle(<4 x i8> %x, <4 x i8> %y) {
   ret  <vscale x 4 x i8> %ins4
 }
 
+define void @sext_scalable_extractelement() {
+; CHECK-LABEL: @sext_scalable_extractelement(
+; CHECK-NEXT:    [[X0:%.*]] = extractelement <vscale x 2 x i32> undef, i32 undef
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[X0]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64* undef, i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <vscale x 2 x i32> undef, i32 undef
+; CHECK-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, i64* undef, i64 [[TMP4]]
+; CHECK-NEXT:    ret void
+;
+  %x0 = extractelement <vscale x 2 x i32> undef, i32 undef
+  %1 = sext i32 %x0 to i64
+  %2 = getelementptr inbounds i64, i64* undef, i64 %1
+  %3 = extractelement <vscale x 2 x i32> undef, i32 undef
+  %4 = sext i32 %3 to i64
+  %5 = getelementptr inbounds i64, i64* undef, i64 %4
+  ret void
+}
+
+define void @zext_scalable_extractelement() {
+; CHECK-LABEL: @zext_scalable_extractelement(
+; CHECK-NEXT:    [[X0:%.*]] = extractelement <vscale x 2 x i32> undef, i32 undef
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[X0]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64* undef, i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <vscale x 2 x i32> undef, i32 undef
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, i64* undef, i64 [[TMP4]]
+; CHECK-NEXT:    ret void
+;
+  %x0 = extractelement <vscale x 2 x i32> undef, i32 undef
+  %1 = zext i32 %x0 to i64
+  %2 = getelementptr inbounds i64, i64* undef, i64 %1
+  %3 = extractelement <vscale x 2 x i32> undef, i32 undef
+  %4 = zext i32 %3 to i64
+  %5 = getelementptr inbounds i64, i64* undef, i64 %4
+  ret void
+}
+
+define void @trunc_scalable_extractelement() {
+; CHECK-LABEL: @trunc_scalable_extractelement(
+; CHECK-NEXT:    [[X0:%.*]] = extractelement <vscale x 2 x i64> undef, i32 undef
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[X0]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* undef, i32 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <vscale x 2 x i64> undef, i32 undef
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* undef, i32 [[TMP4]]
+; CHECK-NEXT:    ret void
+;
+  %x0 = extractelement <vscale x 2 x i64> undef, i32 undef
+  %1 = trunc i64 %x0 to i32
+  %2 = getelementptr inbounds i32, i32* undef, i32 %1
+  %3 = extractelement <vscale x 2 x i64> undef, i32 undef
+  %4 = trunc i64 %3 to i32
+  %5 = getelementptr inbounds i32, i32* undef, i32 %4
+  ret void
+}
+
 declare <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>*, i32 immarg, <vscale x 16 x i1>, <vscale x 16 x i8>)
 declare void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>*, i32 immarg, <vscale x 16 x i1>)