[llvm] r345607 - [InstCombine] try to turn shuffle into insertelement

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 30 08:26:39 PDT 2018


Author: spatel
Date: Tue Oct 30 08:26:39 2018
New Revision: 345607

URL: http://llvm.org/viewvc/llvm-project?rev=345607&view=rev
Log:
[InstCombine] try to turn shuffle into insertelement

shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'

The motivating case is at least a couple of steps away: I noticed that
SLPVectorizer does not analyze shuffles as well as sequences of 
insert/extract in PR34724:
https://bugs.llvm.org/show_bug.cgi?id=34724
...so SLP may fail to vectorize when source code has shuffles to start 
with or instcombine has converted insert/extract to shuffles.

Independent of that, an insertelement is always a simpler op for IR 
analysis vs. a shuffle, so we should transform to insert when possible.

I don't think there's any codegen concern here - if a target can't insert 
a scalar directly to some fixed element in a vector (x86?), then this 
should get expanded to the insert+shuffle that we started with.

Differential Revision: https://reviews.llvm.org/D53507

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
    llvm/trunk/test/Transforms/InstCombine/insert-extract-shuffle.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp?rev=345607&r1=345606&r2=345607&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp Tue Oct 30 08:26:39 2018
@@ -1531,6 +1531,71 @@ static Instruction *foldIdentityExtractS
   return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
 }
 
+/// Try to replace a shuffle with an insertelement.
+static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf) {
+  Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
+  SmallVector<int, 16> Mask = Shuf.getShuffleMask();
+
+  // The shuffle must not change vector sizes.
+  // TODO: This restriction could be removed if the insert has only one use
+  //       (because the transform would require a new length-changing shuffle).
+  int NumElts = Mask.size();
+  if (NumElts != (int)(V0->getType()->getVectorNumElements()))
+    return nullptr;
+
+  // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
+  auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
+    // We need an insertelement with a constant index.
+    if (!match(V0, m_InsertElement(m_Value(), m_Value(Scalar),
+                                   m_ConstantInt(IndexC))))
+      return false;
+
+    // Test the shuffle mask to see if it splices the inserted scalar into the
+    // operand 1 vector of the shuffle.
+    int NewInsIndex = -1;
+    for (int i = 0; i != NumElts; ++i) {
+      // Ignore undef mask elements.
+      if (Mask[i] == -1)
+        continue;
+
+      // The shuffle takes elements of operand 1 without lane changes.
+      if (Mask[i] == NumElts + i)
+        continue;
+
+      // The shuffle must choose the inserted scalar exactly once.
+      if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
+        return false;
+
+      // The shuffle is placing the inserted scalar into element i.
+      NewInsIndex = i;
+    }
+
+    assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
+
+    // Index is updated to the potentially translated insertion lane.
+    IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex);
+    return true;
+  };
+
+  // If the shuffle is unnecessary, insert the scalar operand directly into
+  // operand 1 of the shuffle. Example:
+  // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
+  Value *Scalar;
+  ConstantInt *IndexC;
+  if (isShufflingScalarIntoOp1(Scalar, IndexC))
+    return InsertElementInst::Create(V1, Scalar, IndexC);
+
+  // Try again after commuting shuffle. Example:
+  // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
+  // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
+  std::swap(V0, V1);
+  ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
+  if (isShufflingScalarIntoOp1(Scalar, IndexC))
+    return InsertElementInst::Create(V1, Scalar, IndexC);
+
+  return nullptr;
+}
+
 Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   Value *LHS = SVI.getOperand(0);
   Value *RHS = SVI.getOperand(1);
@@ -1556,6 +1621,11 @@ Instruction *InstCombiner::visitShuffleV
   if (Instruction *I = foldIdentityExtractShuffle(SVI))
     return I;
 
+  // This transform has the potential to lose undef knowledge, so it is
+  // intentionally placed after SimplifyDemandedVectorElts().
+  if (Instruction *I = foldShuffleWithInsert(SVI))
+    return I;
+
   SmallVector<int, 16> Mask = SVI.getShuffleMask();
   Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
   unsigned LHSWidth = LHS->getType()->getVectorNumElements();

Modified: llvm/trunk/test/Transforms/InstCombine/insert-extract-shuffle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/insert-extract-shuffle.ll?rev=345607&r1=345606&r2=345607&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/insert-extract-shuffle.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/insert-extract-shuffle.ll Tue Oct 30 08:26:39 2018
@@ -303,12 +303,11 @@ define <4 x float> @collectShuffleElts(<
   ret <4 x float> %v3
 }
 
-; TODO: Simplest case - insert scalar into undef, then shuffle that value in place into another vector.
+; Simplest case - insert scalar into undef, then shuffle that value in place into another vector.
 
 define <4 x float> @insert_shuffle(float %x, <4 x float> %y) {
 ; CHECK-LABEL: @insert_shuffle(
-; CHECK-NEXT:    [[XV:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 0
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> [[Y:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i32 0
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %xv = insertelement <4 x float> undef, float %x, i32 0
@@ -316,12 +315,11 @@ define <4 x float> @insert_shuffle(float
   ret <4 x float> %r
 }
 
-; TODO: Insert scalar into some element of a dummy vector, then move it to a different element in another vector.
+; Insert scalar into some element of a dummy vector, then move it to a different element in another vector.
 
 define <4 x float> @insert_shuffle_translate(float %x, <4 x float> %y) {
 ; CHECK-LABEL: @insert_shuffle_translate(
-; CHECK-NEXT:    [[XV:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 0
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> [[Y:%.*]], <4 x i32> <i32 4, i32 0, i32 6, i32 7>
+; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i32 1
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %xv = insertelement <4 x float> undef, float %x, i32 0
@@ -329,12 +327,11 @@ define <4 x float> @insert_shuffle_trans
   ret <4 x float> %r
 }
 
-; TODO: The vector operand of the insert is irrelevant.
+; The vector operand of the insert is irrelevant.
 
 define <4 x float> @insert_not_undef_shuffle_translate(float %x, <4 x float> %y, <4 x float> %q) {
 ; CHECK-LABEL: @insert_not_undef_shuffle_translate(
-; CHECK-NEXT:    [[XV:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 3
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> [[Y:%.*]], <4 x i32> <i32 4, i32 5, i32 3, i32 7>
+; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i32 2
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %xv = insertelement <4 x float> %q, float %x, i32 3
@@ -342,12 +339,11 @@ define <4 x float> @insert_not_undef_shu
   ret <4 x float> %r
 }
 
-; TODO: The insert may be the 2nd operand of the shuffle. The shuffle mask can include undef elements.
+; The insert may be the 2nd operand of the shuffle. The shuffle mask can include undef elements.
 
 define <4 x float> @insert_not_undef_shuffle_translate_commute(float %x, <4 x float> %y, <4 x float> %q) {
 ; CHECK-LABEL: @insert_not_undef_shuffle_translate_commute(
-; CHECK-NEXT:    [[XV:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 2
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[XV]], <4 x i32> <i32 0, i32 6, i32 2, i32 undef>
+; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i32 1
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %xv = insertelement <4 x float> %q, float %x, i32 2
@@ -355,13 +351,12 @@ define <4 x float> @insert_not_undef_shu
   ret <4 x float> %r
 }
 
-; TODO: Both shuffle operands may be inserts - choose the correct side.
+; Both shuffle operands may be inserts - choose the correct side.
 
 define <4 x float> @insert_insert_shuffle_translate(float %x1, float %x2, <4 x float> %q) {
 ; CHECK-LABEL: @insert_insert_shuffle_translate(
-; CHECK-NEXT:    [[XV1:%.*]] = insertelement <4 x float> undef, float [[X1:%.*]], i32 0
 ; CHECK-NEXT:    [[XV2:%.*]] = insertelement <4 x float> [[Q:%.*]], float [[X2:%.*]], i32 2
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[XV1]], <4 x float> [[XV2]], <4 x i32> <i32 4, i32 0, i32 6, i32 7>
+; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> [[XV2]], float [[X1:%.*]], i32 1
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %xv1 = insertelement <4 x float> %q, float %x1, i32 0
@@ -370,13 +365,12 @@ define <4 x float> @insert_insert_shuffl
   ret <4 x float> %r
 }
 
-; TODO: Both shuffle operands may be inserts - choose the correct side.
+; Both shuffle operands may be inserts - choose the correct side.
 
 define <4 x float> @insert_insert_shuffle_translate_commute(float %x1, float %x2, <4 x float> %q) {
 ; CHECK-LABEL: @insert_insert_shuffle_translate_commute(
 ; CHECK-NEXT:    [[XV1:%.*]] = insertelement <4 x float> [[Q:%.*]], float [[X1:%.*]], i32 0
-; CHECK-NEXT:    [[XV2:%.*]] = insertelement <4 x float> undef, float [[X2:%.*]], i32 2
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[XV1]], <4 x float> [[XV2]], <4 x i32> <i32 0, i32 6, i32 2, i32 3>
+; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> [[XV1]], float [[X2:%.*]], i32 1
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %xv1 = insertelement <4 x float> %q, float %x1, i32 0
@@ -385,6 +379,9 @@ define <4 x float> @insert_insert_shuffl
   ret <4 x float> %r
 }
 
+; Negative test - this only works if the shuffle is choosing exactly 1 element from 1 of the inputs.
+; TODO: But this could be a special-case because we're inserting into the same base vector.
+
 define <4 x float> @insert_insert_shuffle_translate_wrong_mask(float %x1, float %x2, <4 x float> %q) {
 ; CHECK-LABEL: @insert_insert_shuffle_translate_wrong_mask(
 ; CHECK-NEXT:    [[XV1:%.*]] = insertelement <4 x float> [[Q:%.*]], float [[X1:%.*]], i32 0
@@ -398,7 +395,7 @@ define <4 x float> @insert_insert_shuffl
   ret <4 x float> %r
 }
 
-; TODO: The insert may have other uses.
+; The insert may have other uses.
 
 declare void @use(<4 x float>)
 
@@ -406,7 +403,7 @@ define <4 x float> @insert_not_undef_shu
 ; CHECK-LABEL: @insert_not_undef_shuffle_translate_commute_uses(
 ; CHECK-NEXT:    [[XV:%.*]] = insertelement <4 x float> [[Q:%.*]], float [[X:%.*]], i32 2
 ; CHECK-NEXT:    call void @use(<4 x float> [[XV]])
-; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[XV]], <4 x i32> <i32 6, i32 undef, i32 2, i32 3>
+; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X]], i32 0
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %xv = insertelement <4 x float> %q, float %x, i32 2
@@ -415,6 +412,8 @@ define <4 x float> @insert_not_undef_shu
   ret <4 x float> %r
 }
 
+; Negative test - size-changing shuffle.
+
 define <5 x float> @insert_not_undef_shuffle_translate_commute_lengthen(float %x, <4 x float> %y, <4 x float> %q) {
 ; CHECK-LABEL: @insert_not_undef_shuffle_translate_commute_lengthen(
 ; CHECK-NEXT:    [[XV:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 2




More information about the llvm-commits mailing list