[llvm] r280504 - [InsttCombine] fold insertelement of constant into shuffle with constant operand (PR29126)

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 2 10:05:43 PDT 2016


Author: spatel
Date: Fri Sep  2 12:05:43 2016
New Revision: 280504

URL: http://llvm.org/viewvc/llvm-project?rev=280504&view=rev
Log:
[InsttCombine] fold insertelement of constant into shuffle with constant operand (PR29126)

The motivating case occurs with SSE/AVX scalar intrinsics, so this is a first step towards
shrinking that to a single shufflevector.

Note that the transform is intentionally limited to shuffles that are equivalent to vector
selects to avoid creating arbitrary shuffle masks that may not lower well.

This should solve PR29126:
https://llvm.org/bugs/show_bug.cgi?id=29126

Differential Revision: https://reviews.llvm.org/D23886

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
    llvm/trunk/test/Transforms/InstCombine/insert-const-shuf.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp?rev=280504&r1=280503&r2=280504&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp Fri Sep  2 12:05:43 2016
@@ -566,6 +566,79 @@ Instruction *InstCombiner::visitInsertVa
   return nullptr;
 }
 
+static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) {
+  int MaskSize = Shuf.getMask()->getType()->getVectorNumElements();
+  int VecSize = Shuf.getOperand(0)->getType()->getVectorNumElements();
+
+  // A vector select does not change the size of the operands.
+  if (MaskSize != VecSize)
+    return false;
+
+  // Each mask element must be undefined or choose a vector element from one of
+  // the source operands without crossing vector lanes.
+  for (int i = 0; i != MaskSize; ++i) {
+    int Elt = Shuf.getMaskValue(i);
+    if (Elt != -1 && Elt != i && Elt != i + VecSize)
+      return false;
+  }
+
+  return true;
+}
+
+/// insertelt (shufflevector X, CVec, Mask), C, CIndex -->
+/// shufflevector X, CVec', Mask'
+static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
+  // Bail out if the shuffle has more than one use. In that case, we'd be
+  // replacing the insertelt with a shuffle, and that's not a clear win.
+  auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
+  if (!Shuf || !Shuf->hasOneUse())
+    return nullptr;
+
+  // The shuffle must have a constant vector operand. The insertelt must have a
+  // constant scalar being inserted at a constant position in the vector.
+  Constant *ShufConstVec, *InsEltScalar;
+  uint64_t InsEltIndex;
+  if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
+      !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
+      !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
+    return nullptr;
+
+  // Adding an element to an arbitrary shuffle could be expensive, but a shuffle
+  // that selects elements from vectors without crossing lanes is assumed cheap.
+  // If we're just adding a constant into that shuffle, it will still be cheap.
+  if (!isShuffleEquivalentToSelect(*Shuf))
+    return nullptr;
+
+  // From the above 'select' check, we know that the mask has the same number of
+  // elements as the vector input operands. We also know that each constant
+  // input element is used in its lane and can not be used more than once by the
+  // shuffle. Therefore, replace the constant in the shuffle's constant vector
+  // with the insertelt constant. Replace the constant in the shuffle's mask
+  // vector with the insertelt index plus the length of the vector (because the
+  // constant vector operand of a shuffle is always the 2nd operand).
+  Constant *Mask = Shuf->getMask();
+  unsigned NumElts = Mask->getType()->getVectorNumElements();
+  SmallVector<Constant*, 16> NewShufElts(NumElts);
+  SmallVector<Constant*, 16> NewMaskElts(NumElts);
+  for (unsigned i = 0; i != NumElts; ++i) {
+    if (i == InsEltIndex) {
+      NewShufElts[i] = InsEltScalar;
+      Type *Int32Ty = Type::getInt32Ty(Shuf->getContext());
+      NewMaskElts[i] = ConstantInt::get(Int32Ty, InsEltIndex + NumElts);
+    } else {
+      // Copy over the existing values.
+      NewShufElts[i] = ShufConstVec->getAggregateElement(i);
+      NewMaskElts[i] = Mask->getAggregateElement(i);
+    }
+  }
+
+  // Create new operands for a shuffle that includes the constant of the
+  // original insertelt. The old shuffle will be dead now.
+  Constant *NewShufVec = ConstantVector::get(NewShufElts);
+  Constant *NewMask = ConstantVector::get(NewMaskElts);
+  return new ShuffleVectorInst(Shuf->getOperand(0), NewShufVec, NewMask);
+}
+
 Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
   Value *VecOp    = IE.getOperand(0);
   Value *ScalarOp = IE.getOperand(1);
@@ -625,6 +698,9 @@ Instruction *InstCombiner::visitInsertEl
     return &IE;
   }
 
+  if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE))
+    return Shuf;
+
   return nullptr;
 }
 

Modified: llvm/trunk/test/Transforms/InstCombine/insert-const-shuf.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/insert-const-shuf.ll?rev=280504&r1=280503&r2=280504&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/insert-const-shuf.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/insert-const-shuf.ll Fri Sep  2 12:05:43 2016
@@ -1,12 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -instcombine %s | FileCheck %s
 
-; TODO: Eliminate the insertelement.
+; Eliminate the insertelement.
 
 define <4 x float> @PR29126(<4 x float> %x) {
 ; CHECK-LABEL: @PR29126(
-; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 undef>
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x float> [[SHUF]], float 4.200000e+01, i32 3
+; CHECK-NEXT:    [[INS:%.*]] = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 4.200000e+01>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    ret <4 x float> [[INS]]
 ;
   %shuf = shufflevector <4 x float> %x, <4 x float> <float undef, float 1.0, float 2.0, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
@@ -14,13 +13,11 @@ define <4 x float> @PR29126(<4 x float>
   ret <4 x float> %ins
 }
 
-; TODO: A chain of inserts should collapse.
+; A chain of inserts should collapse.
 
 define <4 x float> @twoInserts(<4 x float> %x) {
 ; CHECK-LABEL: @twoInserts(
-; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> %x, <4 x float> <float undef, float 0.000000e+00, float undef, float undef>, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
-; CHECK-NEXT:    [[INS1:%.*]] = insertelement <4 x float> [[SHUF]], float 4.200000e+01, i32 2
-; CHECK-NEXT:    [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float 1.100000e+01, i32 3
+; CHECK-NEXT:    [[INS2:%.*]] = shufflevector <4 x float> %x, <4 x float> <float undef, float 0.000000e+00, float 4.200000e+01, float 1.100000e+01>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    ret <4 x float> [[INS2]]
 ;
   %shuf = shufflevector <4 x float> %x, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 3>




More information about the llvm-commits mailing list