[llvm] 08c9c13 - [InstCombine][SVE] Fix visitInsertElementInst for scalable type.

Thu May 7 12:45:31 PDT 2020

Author: Huihui Zhang
Date: 2020-05-07T12:44:52-07:00
New Revision: 08c9c13749aebd03bec55442301442670fa0b72c

URL: https://github.com/llvm/llvm-project/commit/08c9c13749aebd03bec55442301442670fa0b72c
DIFF: https://github.com/llvm/llvm-project/commit/08c9c13749aebd03bec55442301442670fa0b72c.diff

LOG: [InstCombine][SVE] Fix visitInsertElementInst for scalable type.

Summary:
This patch fixes the following issues in visitInsertElementInst:

      1. Bail out for scalable type when analysis requires fixed size number of vector elements.
      2. Use cast<FixedVectorType> to get vector number of elements. This ensure assertion
          on scalable vector type.
      3. For scalable type, avoid folding a chain of insertelement into splat:
            insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ...
              ->
            shufflevector(insertelt(X, %k, 0), undef, zero)
          The length of scalable vector is unknown at compile-time, therefore we don't know if
          given insertelement sequence is valid for splat.

Reviewers: sdesmalen, efriedma, spatel, nikic

Reviewed By: sdesmalen, efriedma

Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78895

Added: 
    llvm/test/Transforms/InstCombine/vscale_insertelement.ll

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
    llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 543f76891c3a..b9ee985402c8 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1190,7 +1190,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
                                                 APInt &UndefElts,
                                                 unsigned Depth,
                                                 bool AllowMultipleUsers) {
-  unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
+  // Cannot analyze scalable type. The number of vector elements is not a
+  // compile-time constant.
+  if (isa<ScalableVectorType>(V->getType()))
+    return nullptr;
+
+  unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
   APInt EltMask(APInt::getAllOnesValue(VWidth));
   assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
 

diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index bf362e1a15e2..feb618383e74 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -611,7 +611,7 @@ static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl<int> &Mask,
                                          Value *PermittedRHS,
                                          InstCombiner &IC) {
   assert(V->getType()->isVectorTy() && "Invalid shuffle!");
-  unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
+  unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
 
   if (isa<UndefValue>(V)) {
     Mask.assign(NumElts, -1);
@@ -723,9 +723,14 @@ Instruction *InstCombiner::visitInsertValueInst(InsertValueInst &I) {
 }
 
 static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) {
+  // Can not analyze scalable type, the number of elements is not a compile-time
+  // constant.
+  if (isa<ScalableVectorType>(Shuf.getOperand(0)->getType()))
+    return false;
+
   int MaskSize = Shuf.getShuffleMask().size();
   int VecSize =
-      cast<VectorType>(Shuf.getOperand(0)->getType())->getNumElements();
+      cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements();
 
   // A vector select does not change the size of the operands.
   if (MaskSize != VecSize)
@@ -751,8 +756,12 @@ static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
   if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
     return nullptr;
 
-  auto *VecTy = cast<VectorType>(InsElt.getType());
-  unsigned NumElements = VecTy->getNumElements();
+  VectorType *VecTy = InsElt.getType();
+  // Can not handle scalable type, the number of elements is not a compile-time
+  // constant.
+  if (isa<ScalableVectorType>(VecTy))
+    return nullptr;
+  unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
 
   // Do not try to do this for a one-element vector, since that's a nop,
   // and will cause an inf-loop.
@@ -820,6 +829,11 @@ static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
   if (!Shuf || !Shuf->isZeroEltSplat())
     return nullptr;
 
+  // Bail out early if shuffle is scalable type. The number of elements in
+  // shuffle mask is unknown at compile-time.
+  if (isa<ScalableVectorType>(Shuf->getType()))
+    return nullptr;
+
   // Check for a constant insertion index.
   uint64_t IdxC;
   if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
@@ -852,6 +866,11 @@ static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) {
       !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
     return nullptr;
 
+  // Bail out early if shuffle is scalable type. The number of elements in
+  // shuffle mask is unknown at compile-time.
+  if (isa<ScalableVectorType>(Shuf->getType()))
+    return nullptr;
+
   // Check for a constant insertion index.
   uint64_t IdxC;
   if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
@@ -975,7 +994,12 @@ static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
   } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
     // Transform sequences of insertelements ops with constant data/indexes into
     // a single shuffle op.
-    unsigned NumElts = InsElt.getType()->getNumElements();
+    // Can not handle scalable type, the number of elements needed to create
+    // shuffle mask is not a compile-time constant.
+    if (isa<ScalableVectorType>(InsElt.getType()))
+      return nullptr;
+    unsigned NumElts =
+        cast<FixedVectorType>(InsElt.getType())->getNumElements();
 
     uint64_t InsertIdx[2];
     Constant *Val[2];
@@ -1036,14 +1060,19 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
     return new BitCastInst(NewInsElt, IE.getType());
   }
 
-  // If the inserted element was extracted from some other vector and both
-  // indexes are valid constants, try to turn this into a shuffle.
+  // If the inserted element was extracted from some other fixed-length vector
+  // and both indexes are valid constants, try to turn this into a shuffle.
+  // Can not handle scalable vector type, the number of elements needed to
+  // create shuffle mask is not a compile-time constant.
   uint64_t InsertedIdx, ExtractedIdx;
   Value *ExtVecOp;
-  if (match(IdxOp, m_ConstantInt(InsertedIdx)) &&
+  if (isa<FixedVectorType>(IE.getType()) &&
+      match(IdxOp, m_ConstantInt(InsertedIdx)) &&
       match(ScalarOp,
             m_ExtractElement(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) &&
-      ExtractedIdx < cast<VectorType>(ExtVecOp->getType())->getNumElements()) {
+      isa<FixedVectorType>(ExtVecOp->getType()) &&
+      ExtractedIdx <
+          cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) {
     // TODO: Looking at the user(s) to determine if this insert is a
     // fold-to-shuffle opportunity does not match the usual instcombine
     // constraints. We should decide if the transform is worthy based only
@@ -1083,13 +1112,15 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
     }
   }
 
-  unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements();
-  APInt UndefElts(VWidth, 0);
-  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
-  if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
-    if (V != &IE)
-      return replaceInstUsesWith(IE, V);
-    return &IE;
+  if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
+    unsigned VWidth = VecTy->getNumElements();
+    APInt UndefElts(VWidth, 0);
+    APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+    if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
+      if (V != &IE)
+        return replaceInstUsesWith(IE, V);
+      return &IE;
+    }
   }
 
   if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE))

diff  --git a/llvm/test/Transforms/InstCombine/vscale_insertelement.ll b/llvm/test/Transforms/InstCombine/vscale_insertelement.ll
new file mode 100644
index 000000000000..e1962d731bf1
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vscale_insertelement.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; This test checks that bitcast is moved after insertelement when both vector and scalar are
+; bitcast from the same element type.
+; inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp
+;  --> bitcast (inselt VecSrc, ScalarSrc, IdxOp)
+define <vscale x 4 x float> @insertelement_bitcast(<vscale x 4 x i32> %vec, i32 %x) {
+; CHECK-LABEL: @insertelement_bitcast(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <vscale x 4 x i32> [[VEC:%.*]], i32 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[R:%.*]] = bitcast <vscale x 4 x i32> [[TMP1]] to <vscale x 4 x float>
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %x_cast = bitcast i32 %x to float
+  %vec_cast = bitcast <vscale x 4 x i32> %vec to <vscale x 4 x float>
+  %r = insertelement <vscale x 4 x float> %vec_cast, float %x_cast, i32 0
+  ret <vscale x 4 x float> %r
+}
+
+; This test checks that code-path "Try to form a shuffle from a chain of extract-insert ops" is
+; not taken when both extract and insert are scalable type.
+; For scalable type, the vector length needed to create shuffle mask is not a compile-time constant.
+; Meanwhile, for scalable type shuffle mask only support splat and undef in the current code base.
+; Otherwise we crash at:
+; "Assertion `isValidOperands(V1, V2, Mask) && "Invalid shuffle vector instruction operands!"' failed."
+define <vscale x 4 x i32> @insertelement_extractelement(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: @insertelement_extractelement(
+; CHECK-NEXT:    [[T0:%.*]] = extractelement <vscale x 4 x i32> [[A:%.*]], i32 1
+; CHECK-NEXT:    [[T1:%.*]] = insertelement <vscale x 4 x i32> [[B:%.*]], i32 [[T0]], i32 0
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[T1]]
+;
+  %t0 = extractelement <vscale x 4 x i32> %a, i32 1
+  %t1 = insertelement <vscale x 4 x i32> %b, i32 %t0, i32 0
+  ret <vscale x 4 x i32> %t1
+}
+
+; This test checks that we are not attempting to create a shuffle from extract/insert chain,
+; when extract is from a scalable type, and the insert vector is fixed-length.
+define <4 x i32> @insertelement_extractelement_fixed_vec_extract_from_scalable(<vscale x 4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @insertelement_extractelement_fixed_vec_extract_from_scalable(
+; CHECK-NEXT:    [[T0:%.*]] = extractelement <vscale x 4 x i32> [[A:%.*]], i32 1
+; CHECK-NEXT:    [[T1:%.*]] = insertelement <4 x i32> [[B:%.*]], i32 [[T0]], i32 0
+; CHECK-NEXT:    ret <4 x i32> [[T1]]
+;
+  %t0 = extractelement <vscale x 4 x i32> %a, i32 1
+  %t1 = insertelement <4 x i32> %b, i32 %t0, i32 0
+  ret <4 x i32> %t1
+}
+
+; This test checks that the optimization "foldConstantInsEltInfoShuffle" is not taken for scalable type.
+; Particularly the fold:
+; insertelt (insertelt X, C1, CIndex1), C, CIndex
+;  --> shufflevector X, CVec', Mask'
+; For scalable type, the vector length needed to create shuffle mask is not a compile-time constant.
+; Meanwhile, for scalable type shuffle mask only support splat and undef in the current code base.
+; Otherwise we crash at:
+; "Assertion `isValidOperands(V1, V2, Mask) && "Invalid shuffle vector instruction operands!"' failed."
+define <vscale x 4 x i32> @insertelement_insertelement(<vscale x 4 x i32> %vec) {
+; CHECK-LABEL: @insertelement_insertelement(
+; CHECK-NEXT:    [[T0:%.*]] = insertelement <vscale x 4 x i32> [[VEC:%.*]], i32 1, i32 1
+; CHECK-NEXT:    [[T1:%.*]] = insertelement <vscale x 4 x i32> [[T0]], i32 2, i32 2
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[T1]]
+;
+  %t0 = insertelement <vscale x 4 x i32> %vec, i32 1, i32 1
+  %t1 = insertelement <vscale x 4 x i32> %t0, i32 2, i32 2
+  ret <vscale x 4 x i32> %t1
+}
+
+; This test checks that the following insertelement sequence is not folded into shuffle splat.
+; The length of scalable vector is unknown at compile-time. Therefore the following insertelements
+; may not form a valid splat.
+define <vscale x 4 x float> @insertelement_sequene_may_not_be_splat(float %x) {
+; CHECK-LABEL: @insertelement_sequene_may_not_be_splat(
+; CHECK-NEXT:    [[T0:%.*]] = insertelement <vscale x 4 x float> undef, float [[X:%.*]], i32 0
+; CHECK-NEXT:    [[T1:%.*]] = insertelement <vscale x 4 x float> [[T0]], float [[X]], i32 1
+; CHECK-NEXT:    [[T2:%.*]] = insertelement <vscale x 4 x float> [[T1]], float [[X]], i32 2
+; CHECK-NEXT:    [[T3:%.*]] = insertelement <vscale x 4 x float> [[T2]], float [[X]], i32 3
+; CHECK-NEXT:    ret <vscale x 4 x float> [[T3]]
+;
+  %t0 = insertelement <vscale x 4 x float> undef, float %x, i32 0
+  %t1 = insertelement <vscale x 4 x float> %t0, float %x, i32 1
+  %t2 = insertelement <vscale x 4 x float> %t1, float %x, i32 2
+  %t3 = insertelement <vscale x 4 x float> %t2, float %x, i32 3
+  ret <vscale x 4 x float> %t3
+}