[llvm] r300714 - [InstSimplify] fold identity shuffles (recursing if needed)

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 19 09:48:23 PDT 2017


Author: spatel
Date: Wed Apr 19 11:48:22 2017
New Revision: 300714

URL: http://llvm.org/viewvc/llvm-project?rev=300714&view=rev
Log:
[InstSimplify] fold identity shuffles (recursing if needed)

This patch simplifies the examples from D31509 and D31927 (PR30630) and catches 
the basic identity shuffle tests that Zvi recently added.

I'm not sure if we have something like this in DAGCombiner, but we should?

It's worth noting that "MaxRecurse / RecursionLimit" is only 3 on entry at the moment. 
We might want to bump that up if there are longer shuffle chains like this in the wild.

For now, we're ignoring shuffles that have undef mask elements because it's not
clear how those should be handled.

Differential Revision: https://reviews.llvm.org/D31960

Modified:
    llvm/trunk/lib/Analysis/InstructionSimplify.cpp
    llvm/trunk/test/Transforms/InstSimplify/shufflevector.ll

Modified: llvm/trunk/lib/Analysis/InstructionSimplify.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/InstructionSimplify.cpp?rev=300714&r1=300713&r2=300714&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/InstructionSimplify.cpp (original)
+++ llvm/trunk/lib/Analysis/InstructionSimplify.cpp Wed Apr 19 11:48:22 2017
@@ -4080,6 +4080,60 @@ Value *llvm::SimplifyCastInst(unsigned C
                             RecursionLimit);
 }
 
+/// For the given destination element of a shuffle, peek through shuffles to
+/// match a root vector source operand that contains that element in the same
+/// vector lane (ie, the same mask index), so we can eliminate the shuffle(s).
+static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1,
+                                   Constant *Mask, Value *RootVec, int RootElt,
+                                   unsigned MaxRecurse) {
+  if (!MaxRecurse--)
+    return nullptr;
+
+  // Bail out if any mask value is undefined. That kind of shuffle may be
+  // simplified further based on demanded bits or other folds.
+  int MaskVal = ShuffleVectorInst::getMaskValue(Mask, RootElt);
+  if (MaskVal == -1)
+    return nullptr;
+
+  // The mask value chooses which source operand we need to look at next.
+  Value *SourceOp;
+  int InVecNumElts = Op0->getType()->getVectorNumElements();
+  if (MaskVal < InVecNumElts) {
+    RootElt = MaskVal;
+    SourceOp = Op0;
+  } else {
+    RootElt = MaskVal - InVecNumElts;
+    SourceOp = Op1;
+  }
+
+  // If the source operand is a shuffle itself, look through it to find the
+  // matching root vector.
+  if (auto *SourceShuf = dyn_cast<ShuffleVectorInst>(SourceOp)) {
+    return foldIdentityShuffles(
+        DestElt, SourceShuf->getOperand(0), SourceShuf->getOperand(1),
+        SourceShuf->getMask(), RootVec, RootElt, MaxRecurse);
+  }
+
+  // TODO: Look through bitcasts? What if the bitcast changes the vector element
+  // size?
+
+  // The source operand is not a shuffle. Initialize the root vector value for
+  // this shuffle if that has not been done yet.
+  if (!RootVec)
+    RootVec = SourceOp;
+
+  // Give up as soon as a source operand does not match the existing root value.
+  if (RootVec != SourceOp)
+    return nullptr;
+
+  // The element must be coming from the same lane in the source vector
+  // (although it may have crossed lanes in intermediate shuffles).
+  if (RootElt != DestElt)
+    return nullptr;
+
+  return RootVec;
+}
+
 static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
                                         Type *RetTy, const Query &Q,
                                         unsigned MaxRecurse) {
@@ -4124,7 +4178,28 @@ static Value *SimplifyShuffleVectorInst(
         OpShuf->getMask()->getSplatValue())
       return Op1;
 
-  return nullptr;
+  // Don't fold a shuffle with undef mask elements. This may get folded in a
+  // better way using demanded bits or other analysis.
+  // TODO: Should we allow this?
+  for (unsigned i = 0; i != MaskNumElts; ++i)
+    if (ShuffleVectorInst::getMaskValue(Mask, i) == -1)
+      return nullptr;
+
+  // Check if every element of this shuffle can be mapped back to the
+  // corresponding element of a single root vector. If so, we don't need this
+  // shuffle. This handles simple identity shuffles as well as chains of
+  // shuffles that may widen/narrow and/or move elements across lanes and back.
+  Value *RootVec = nullptr;
+  for (unsigned i = 0; i != MaskNumElts; ++i) {
+    // Note that recursion is limited for each vector element, so if any element
+    // exceeds the limit, this will fail to simplify.
+    RootVec = foldIdentityShuffles(i, Op0, Op1, Mask, RootVec, i, MaxRecurse);
+
+    // We can't replace a widening/narrowing shuffle with one of its operands.
+    if (!RootVec || RootVec->getType() != RetTy)
+      return nullptr;
+  }
+  return RootVec;
 }
 
 /// Given operands for a ShuffleVectorInst, fold the result or return null.

Modified: llvm/trunk/test/Transforms/InstSimplify/shufflevector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstSimplify/shufflevector.ll?rev=300714&r1=300713&r2=300714&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstSimplify/shufflevector.ll (original)
+++ llvm/trunk/test/Transforms/InstSimplify/shufflevector.ll Wed Apr 19 11:48:22 2017
@@ -120,8 +120,7 @@ define <4 x i32> @undef_mask(<4 x i32> %
 
 define <4 x i32> @identity_mask_0(<4 x i32> %x) {
 ; CHECK-LABEL: @identity_mask_0(
-; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x i32> [[SHUF]]
+; CHECK-NEXT:    ret <4 x i32> [[X:%.*]]
 ;
   %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   ret <4 x i32> %shuf
@@ -129,8 +128,7 @@ define <4 x i32> @identity_mask_0(<4 x i
 
 define <4 x i32> @identity_mask_1(<4 x i32> %x) {
 ; CHECK-LABEL: @identity_mask_1(
-; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> undef, <4 x i32> [[X:%.*]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    ret <4 x i32> [[SHUF]]
+; CHECK-NEXT:    ret <4 x i32> [[X:%.*]]
 ;
   %shuf = shufflevector <4 x i32> undef, <4 x i32> %x, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   ret <4 x i32> %shuf
@@ -138,13 +136,32 @@ define <4 x i32> @identity_mask_1(<4 x i
 
 define <4 x i32> @pseudo_identity_mask(<4 x i32> %x) {
 ; CHECK-LABEL: @pseudo_identity_mask(
-; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
-; CHECK-NEXT:    ret <4 x i32> [[SHUF]]
+; CHECK-NEXT:    ret <4 x i32> [[X:%.*]]
 ;
   %shuf = shufflevector <4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
   ret <4 x i32> %shuf
 }
 
+define <4 x i32> @not_identity_mask(<4 x i32> %x) {
+; CHECK-LABEL: @not_identity_mask(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+; CHECK-NEXT:    ret <4 x i32> [[SHUF]]
+;
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
+  ret <4 x i32> %shuf
+}
+
+; TODO: Should we simplify if the mask has an undef element?
+
+define <4 x i32> @possible_identity_mask(<4 x i32> %x) {
+; CHECK-LABEL: @possible_identity_mask(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+; CHECK-NEXT:    ret <4 x i32> [[SHUF]]
+;
+  %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  ret <4 x i32> %shuf
+}
+
 define <4 x i32> @const_operand(<4 x i32> %x) {
 ; CHECK-LABEL: @const_operand(
 ; CHECK-NEXT:    ret <4 x i32> <i32 42, i32 45, i32 44, i32 43>
@@ -155,10 +172,7 @@ define <4 x i32> @const_operand(<4 x i32
 
 define <4 x i32> @merge(<4 x i32> %x) {
 ; CHECK-LABEL: @merge(
-; CHECK-NEXT:    [[LOWER:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:    [[UPPER:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[MERGED:%.*]] = shufflevector <2 x i32> [[UPPER]], <2 x i32> [[LOWER]], <4 x i32> <i32 3, i32 2, i32 0, i32 1>
-; CHECK-NEXT:    ret <4 x i32> [[MERGED]]
+; CHECK-NEXT:    ret <4 x i32> [[X:%.*]]
 ;
   %lower = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 1, i32 0>
   %upper = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@@ -166,16 +180,24 @@ define <4 x i32> @merge(<4 x i32> %x) {
   ret <4 x i32> %merged
 }
 
+; This crosses lanes from the source op.
+
+define <4 x i32> @not_merge(<4 x i32> %x) {
+; CHECK-LABEL: @not_merge(
+; CHECK-NEXT:    [[L:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    [[U:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[MERGED:%.*]] = shufflevector <2 x i32> [[U]], <2 x i32> [[L]], <4 x i32> <i32 3, i32 2, i32 0, i32 1>
+; CHECK-NEXT:    ret <4 x i32> [[MERGED]]
+;
+  %l = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  %u = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+  %merged = shufflevector <2 x i32> %u, <2 x i32> %l, <4 x i32> <i32 3, i32 2, i32 0, i32 1>
+  ret <4 x i32> %merged
+}
+
 define <8 x double> @extract_and_concat(<8 x double> %x) {
 ; CHECK-LABEL: @extract_and_concat(
-; CHECK-NEXT:    [[S1:%.*]] = shufflevector <8 x double> [[X:%.*]], <8 x double> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[S2:%.*]] = shufflevector <8 x double> [[X]], <8 x double> undef, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[S3:%.*]] = shufflevector <8 x double> [[X]], <8 x double> undef, <2 x i32> <i32 4, i32 5>
-; CHECK-NEXT:    [[S4:%.*]] = shufflevector <8 x double> [[X]], <8 x double> undef, <2 x i32> <i32 6, i32 7>
-; CHECK-NEXT:    [[S5:%.*]] = shufflevector <2 x double> [[S1]], <2 x double> [[S2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[S6:%.*]] = shufflevector <2 x double> [[S3]], <2 x double> [[S4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[S7:%.*]] = shufflevector <4 x double> [[S5]], <4 x double> [[S6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    ret <8 x double> [[S7]]
+; CHECK-NEXT:    ret <8 x double> [[X:%.*]]
 ;
   %s1 = shufflevector <8 x double> %x, <8 x double> undef, <2 x i32> <i32 0, i32 1>
   %s2 = shufflevector <8 x double> %x, <8 x double> undef, <2 x i32> <i32 2, i32 3>
@@ -191,14 +213,7 @@ define <8 x double> @extract_and_concat(
 
 define <8 x i64> @PR30630(<8 x i64> %x) {
 ; CHECK-LABEL: @PR30630(
-; CHECK-NEXT:    [[S1:%.*]] = shufflevector <8 x i64> [[X:%.*]], <8 x i64> undef, <2 x i32> <i32 0, i32 4>
-; CHECK-NEXT:    [[S2:%.*]] = shufflevector <8 x i64> [[X]], <8 x i64> undef, <2 x i32> <i32 1, i32 5>
-; CHECK-NEXT:    [[S3:%.*]] = shufflevector <8 x i64> [[X]], <8 x i64> undef, <2 x i32> <i32 2, i32 6>
-; CHECK-NEXT:    [[S4:%.*]] = shufflevector <8 x i64> [[X]], <8 x i64> undef, <2 x i32> <i32 3, i32 7>
-; CHECK-NEXT:    [[S5:%.*]] = shufflevector <2 x i64> [[S1]], <2 x i64> [[S2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[S6:%.*]] = shufflevector <2 x i64> [[S3]], <2 x i64> [[S4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[S7:%.*]] = shufflevector <4 x i64> [[S5]], <4 x i64> [[S6]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT:    ret <8 x i64> [[S7]]
+; CHECK-NEXT:    ret <8 x i64> [[X:%.*]]
 ;
   %s1 = shufflevector <8 x i64> %x, <8 x i64> undef, <2 x i32> <i32 0, i32 4>
   %s2 = shufflevector <8 x i64> %x, <8 x i64> undef, <2 x i32> <i32 1, i32 5>




More information about the llvm-commits mailing list