[llvm] e239198 - [InstCombine] fold select shuffles with shared operand together

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 28 08:58:55 PDT 2022


Author: Sanjay Patel
Date: 2022-09-28T11:56:27-04:00
New Revision: e239198cdbbf2689ffe2c21ea8feab5b7a4a8a02

URL: https://github.com/llvm/llvm-project/commit/e239198cdbbf2689ffe2c21ea8feab5b7a4a8a02
DIFF: https://github.com/llvm/llvm-project/commit/e239198cdbbf2689ffe2c21ea8feab5b7a4a8a02.diff

LOG: [InstCombine] fold select shuffles with shared operand together

We don't combine generic shuffles together in IR, but select
shuffles are a special-case because a select shuffle of a
select shuffle is just another select shuffle; codegen is
expected to efficiently lower those (select shuffles are also
the canonical form of a vector select with constant condition).

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
    llvm/test/Transforms/InstCombine/shuffle_select.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 9a144de36af32..40acc2423a246 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -1961,6 +1961,53 @@ static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL) {
   return {};
 }
 
+/// A select shuffle of a select shuffle with a shared operand can be reduced
+/// to a single select shuffle. This is an obvious improvement in IR, and the
+/// backend is expected to lower select shuffles efficiently.
+static Instruction *foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf) {
+  assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
+
+  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
+  SmallVector<int, 16> Mask;
+  Shuf.getShuffleMask(Mask);
+  unsigned NumElts = Mask.size();
+
+  // Canonicalize a select shuffle with common operand as Op1.
+  auto *ShufOp = dyn_cast<ShuffleVectorInst>(Op0);
+  if (ShufOp && ShufOp->isSelect() &&
+      (ShufOp->getOperand(0) == Op1 || ShufOp->getOperand(1) == Op1)) {
+    std::swap(Op0, Op1);
+    ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
+  }
+
+  ShufOp = dyn_cast<ShuffleVectorInst>(Op1);
+  if (!ShufOp || !ShufOp->isSelect() ||
+      (ShufOp->getOperand(0) != Op0 && ShufOp->getOperand(1) != Op0))
+    return nullptr;
+
+  Value *X = ShufOp->getOperand(0), *Y = ShufOp->getOperand(1);
+  SmallVector<int, 16> Mask1;
+  ShufOp->getShuffleMask(Mask1);
+  assert(Mask1.size() == NumElts && "Vector size changed with select shuffle");
+
+  // Canonicalize common operand (Op0) as X (first operand of first shuffle).
+  if (Y == Op0) {
+    std::swap(X, Y);
+    ShuffleVectorInst::commuteShuffleMask(Mask1, NumElts);
+  }
+
+  // If the mask chooses from X (operand 0), it stays the same.
+  // If the mask chooses from the earlier shuffle, the other mask value is
+  // transferred to the combined select shuffle:
+  // shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'
+  SmallVector<int, 16> NewMask(NumElts);
+  for (unsigned i = 0; i != NumElts; ++i)
+    NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i];
+
+  assert(ShuffleVectorInst::isSelectMask(NewMask) && "Unexpected shuffle mask");
+  return new ShuffleVectorInst(X, Y, NewMask);
+}
+
 static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) {
   assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
 
@@ -2065,6 +2112,9 @@ Instruction *InstCombinerImpl::foldSelectShuffle(ShuffleVectorInst &Shuf) {
     return &Shuf;
   }
 
+  if (Instruction *I = foldSelectShuffleOfSelectShuffle(Shuf))
+    return I;
+
   if (Instruction *I = foldSelectShuffleWith1Binop(Shuf))
     return I;
 

diff  --git a/llvm/test/Transforms/InstCombine/shuffle_select.ll b/llvm/test/Transforms/InstCombine/shuffle_select.ll
index 9133a1fbabd6c..357b8e1c9a1ae 100644
--- a/llvm/test/Transforms/InstCombine/shuffle_select.ll
+++ b/llvm/test/Transforms/InstCombine/shuffle_select.ll
@@ -1528,10 +1528,12 @@ define <4 x i32> @PR41419(<4 x i32> %v) {
   ret <4 x i32> %s
 }
 
+; The shuffle masks in the next 4 tests are identical to make it easier
+; to see that we are choosing the correct elements in the new shuffle.
+
 define <5 x i4> @sel_common_op_commute0(<5 x i4> %x, <5 x i4> %y) {
 ; CHECK-LABEL: @sel_common_op_commute0(
-; CHECK-NEXT:    [[S1:%.*]] = shufflevector <5 x i4> [[X:%.*]], <5 x i4> [[Y:%.*]], <5 x i32> <i32 undef, i32 6, i32 2, i32 undef, i32 undef>
-; CHECK-NEXT:    [[S2:%.*]] = shufflevector <5 x i4> [[X]], <5 x i4> [[S1]], <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 4>
+; CHECK-NEXT:    [[S2:%.*]] = shufflevector <5 x i4> [[X:%.*]], <5 x i4> [[Y:%.*]], <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 4>
 ; CHECK-NEXT:    ret <5 x i4> [[S2]]
 ;
   %s1 = shufflevector <5 x i4> %x, <5 x i4> %y, <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 9>
@@ -1541,8 +1543,7 @@ define <5 x i4> @sel_common_op_commute0(<5 x i4> %x, <5 x i4> %y) {
 
 define <5 x i4> @sel_common_op_commute1(<5 x i4> %x, <5 x i4> %y) {
 ; CHECK-LABEL: @sel_common_op_commute1(
-; CHECK-NEXT:    [[S1:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> <i32 undef, i32 6, i32 2, i32 undef, i32 undef>
-; CHECK-NEXT:    [[S2:%.*]] = shufflevector <5 x i4> [[X]], <5 x i4> [[S1]], <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 4>
+; CHECK-NEXT:    [[S2:%.*]] = shufflevector <5 x i4> [[X:%.*]], <5 x i4> [[Y:%.*]], <5 x i32> <i32 0, i32 1, i32 7, i32 3, i32 4>
 ; CHECK-NEXT:    ret <5 x i4> [[S2]]
 ;
   %s1 = shufflevector <5 x i4> %y, <5 x i4> %x, <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 9>
@@ -1552,8 +1553,7 @@ define <5 x i4> @sel_common_op_commute1(<5 x i4> %x, <5 x i4> %y) {
 
 define <5 x i4> @sel_common_op_commute2(<5 x i4> %x, <5 x i4> %y) {
 ; CHECK-LABEL: @sel_common_op_commute2(
-; CHECK-NEXT:    [[S1:%.*]] = shufflevector <5 x i4> [[X:%.*]], <5 x i4> [[Y:%.*]], <5 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 9>
-; CHECK-NEXT:    [[S2:%.*]] = shufflevector <5 x i4> [[S1]], <5 x i4> [[X]], <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 4>
+; CHECK-NEXT:    [[S2:%.*]] = shufflevector <5 x i4> [[X:%.*]], <5 x i4> [[Y:%.*]], <5 x i32> <i32 0, i32 1, i32 2, i32 3, i32 9>
 ; CHECK-NEXT:    ret <5 x i4> [[S2]]
 ;
   %s1 = shufflevector <5 x i4> %x, <5 x i4> %y, <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 9>
@@ -1563,8 +1563,7 @@ define <5 x i4> @sel_common_op_commute2(<5 x i4> %x, <5 x i4> %y) {
 
 define <5 x i4> @sel_common_op_commute3(<5 x i4> %x, <5 x i4> %y) {
 ; CHECK-LABEL: @sel_common_op_commute3(
-; CHECK-NEXT:    [[S1:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 9>
-; CHECK-NEXT:    [[S2:%.*]] = shufflevector <5 x i4> [[S1]], <5 x i4> [[X]], <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 4>
+; CHECK-NEXT:    [[S2:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 9>
 ; CHECK-NEXT:    ret <5 x i4> [[S2]]
 ;
   %s1 = shufflevector <5 x i4> %y, <5 x i4> %x, <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 9>
@@ -1574,8 +1573,7 @@ define <5 x i4> @sel_common_op_commute3(<5 x i4> %x, <5 x i4> %y) {
 
 define <5 x i4> @sel_common_op_commute3_poison_mask_elts(<5 x i4> %x, <5 x i4> %y) {
 ; CHECK-LABEL: @sel_common_op_commute3_poison_mask_elts(
-; CHECK-NEXT:    [[S1:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 9>
-; CHECK-NEXT:    [[S2:%.*]] = shufflevector <5 x i4> [[S1]], <5 x i4> [[X]], <5 x i32> <i32 0, i32 6, i32 undef, i32 undef, i32 4>
+; CHECK-NEXT:    [[S2:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> <i32 0, i32 6, i32 undef, i32 undef, i32 9>
 ; CHECK-NEXT:    ret <5 x i4> [[S2]]
 ;
   %s1 = shufflevector <5 x i4> %y, <5 x i4> %x, <5 x i32> <i32 0, i32 6, i32 2, i32 poison, i32 9>
@@ -1583,6 +1581,8 @@ define <5 x i4> @sel_common_op_commute3_poison_mask_elts(<5 x i4> %x, <5 x i4> %
   ret <5 x i4> %s2
 }
 
+; negative test - need shared operand
+
 define <5 x i4> @sel_not_common_op_commute3(<5 x i4> %x, <5 x i4> %y, <5 x i4> %z) {
 ; CHECK-LABEL: @sel_not_common_op_commute3(
 ; CHECK-NEXT:    [[S1:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[Z:%.*]], <5 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 9>
@@ -1594,6 +1594,8 @@ define <5 x i4> @sel_not_common_op_commute3(<5 x i4> %x, <5 x i4> %y, <5 x i4> %
   ret <5 x i4> %s2
 }
 
+; negative test - need "select" shuffle, no lane changes
+
 define <5 x i4> @not_sel_common_op(<5 x i4> %x, <5 x i4> %y) {
 ; CHECK-LABEL: @not_sel_common_op(
 ; CHECK-NEXT:    [[S1:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> <i32 undef, i32 6, i32 undef, i32 3, i32 9>
@@ -1605,11 +1607,13 @@ define <5 x i4> @not_sel_common_op(<5 x i4> %x, <5 x i4> %y) {
   ret <5 x i4> %s2
 }
 
+; extra use is ok
+
 define <4 x i32> @sel_common_op_extra_use(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @sel_common_op_extra_use(
 ; CHECK-NEXT:    [[S1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> [[X:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
 ; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[S1]])
-; CHECK-NEXT:    [[S2:%.*]] = shufflevector <4 x i32> [[S1]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    [[S2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    ret <4 x i32> [[S2]]
 ;
   %s1 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 0, i32 5, i32 2, i32 7>


        


More information about the llvm-commits mailing list