[llvm] e239198 - [InstCombine] fold select shuffles with shared operand together
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 28 08:58:55 PDT 2022
Author: Sanjay Patel
Date: 2022-09-28T11:56:27-04:00
New Revision: e239198cdbbf2689ffe2c21ea8feab5b7a4a8a02
URL: https://github.com/llvm/llvm-project/commit/e239198cdbbf2689ffe2c21ea8feab5b7a4a8a02
DIFF: https://github.com/llvm/llvm-project/commit/e239198cdbbf2689ffe2c21ea8feab5b7a4a8a02.diff
LOG: [InstCombine] fold select shuffles with shared operand together
We don't combine generic shuffles together in IR, but select
shuffles are a special-case because a select shuffle of a
select shuffle is just another select shuffle; codegen is
expected to efficiently lower those (select shuffles are also
the canonical form of a vector select with constant condition).
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
llvm/test/Transforms/InstCombine/shuffle_select.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 9a144de36af32..40acc2423a246 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -1961,6 +1961,53 @@ static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL) {
return {};
}
+/// A select shuffle of a select shuffle with a shared operand can be reduced
+/// to a single select shuffle. This is an obvious improvement in IR, and the
+/// backend is expected to lower select shuffles efficiently.
+static Instruction *foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf) {
+ assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
+
+ Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
+ SmallVector<int, 16> Mask;
+ Shuf.getShuffleMask(Mask);
+ unsigned NumElts = Mask.size();
+
+ // Canonicalize a select shuffle with common operand as Op1.
+ auto *ShufOp = dyn_cast<ShuffleVectorInst>(Op0);
+ if (ShufOp && ShufOp->isSelect() &&
+ (ShufOp->getOperand(0) == Op1 || ShufOp->getOperand(1) == Op1)) {
+ std::swap(Op0, Op1);
+ ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
+ }
+
+ ShufOp = dyn_cast<ShuffleVectorInst>(Op1);
+ if (!ShufOp || !ShufOp->isSelect() ||
+ (ShufOp->getOperand(0) != Op0 && ShufOp->getOperand(1) != Op0))
+ return nullptr;
+
+ Value *X = ShufOp->getOperand(0), *Y = ShufOp->getOperand(1);
+ SmallVector<int, 16> Mask1;
+ ShufOp->getShuffleMask(Mask1);
+ assert(Mask1.size() == NumElts && "Vector size changed with select shuffle");
+
+ // Canonicalize common operand (Op0) as X (first operand of first shuffle).
+ if (Y == Op0) {
+ std::swap(X, Y);
+ ShuffleVectorInst::commuteShuffleMask(Mask1, NumElts);
+ }
+
+ // If the mask chooses from X (operand 0), it stays the same.
+ // If the mask chooses from the earlier shuffle, the other mask value is
+ // transferred to the combined select shuffle:
+ // shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'
+ SmallVector<int, 16> NewMask(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i)
+ NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i];
+
+ assert(ShuffleVectorInst::isSelectMask(NewMask) && "Unexpected shuffle mask");
+ return new ShuffleVectorInst(X, Y, NewMask);
+}
+
static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) {
assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
@@ -2065,6 +2112,9 @@ Instruction *InstCombinerImpl::foldSelectShuffle(ShuffleVectorInst &Shuf) {
return &Shuf;
}
+ if (Instruction *I = foldSelectShuffleOfSelectShuffle(Shuf))
+ return I;
+
if (Instruction *I = foldSelectShuffleWith1Binop(Shuf))
return I;
diff --git a/llvm/test/Transforms/InstCombine/shuffle_select.ll b/llvm/test/Transforms/InstCombine/shuffle_select.ll
index 9133a1fbabd6c..357b8e1c9a1ae 100644
--- a/llvm/test/Transforms/InstCombine/shuffle_select.ll
+++ b/llvm/test/Transforms/InstCombine/shuffle_select.ll
@@ -1528,10 +1528,12 @@ define <4 x i32> @PR41419(<4 x i32> %v) {
ret <4 x i32> %s
}
+; The shuffle masks in the next 4 tests are identical to make it easier
+; to see that we are choosing the correct elements in the new shuffle.
+
define <5 x i4> @sel_common_op_commute0(<5 x i4> %x, <5 x i4> %y) {
; CHECK-LABEL: @sel_common_op_commute0(
-; CHECK-NEXT: [[S1:%.*]] = shufflevector <5 x i4> [[X:%.*]], <5 x i4> [[Y:%.*]], <5 x i32> <i32 undef, i32 6, i32 2, i32 undef, i32 undef>
-; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[X]], <5 x i4> [[S1]], <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 4>
+; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[X:%.*]], <5 x i4> [[Y:%.*]], <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 4>
; CHECK-NEXT: ret <5 x i4> [[S2]]
;
%s1 = shufflevector <5 x i4> %x, <5 x i4> %y, <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 9>
@@ -1541,8 +1543,7 @@ define <5 x i4> @sel_common_op_commute0(<5 x i4> %x, <5 x i4> %y) {
define <5 x i4> @sel_common_op_commute1(<5 x i4> %x, <5 x i4> %y) {
; CHECK-LABEL: @sel_common_op_commute1(
-; CHECK-NEXT: [[S1:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> <i32 undef, i32 6, i32 2, i32 undef, i32 undef>
-; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[X]], <5 x i4> [[S1]], <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 4>
+; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[X:%.*]], <5 x i4> [[Y:%.*]], <5 x i32> <i32 0, i32 1, i32 7, i32 3, i32 4>
; CHECK-NEXT: ret <5 x i4> [[S2]]
;
%s1 = shufflevector <5 x i4> %y, <5 x i4> %x, <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 9>
@@ -1552,8 +1553,7 @@ define <5 x i4> @sel_common_op_commute1(<5 x i4> %x, <5 x i4> %y) {
define <5 x i4> @sel_common_op_commute2(<5 x i4> %x, <5 x i4> %y) {
; CHECK-LABEL: @sel_common_op_commute2(
-; CHECK-NEXT: [[S1:%.*]] = shufflevector <5 x i4> [[X:%.*]], <5 x i4> [[Y:%.*]], <5 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 9>
-; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[S1]], <5 x i4> [[X]], <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 4>
+; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[X:%.*]], <5 x i4> [[Y:%.*]], <5 x i32> <i32 0, i32 1, i32 2, i32 3, i32 9>
; CHECK-NEXT: ret <5 x i4> [[S2]]
;
%s1 = shufflevector <5 x i4> %x, <5 x i4> %y, <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 9>
@@ -1563,8 +1563,7 @@ define <5 x i4> @sel_common_op_commute2(<5 x i4> %x, <5 x i4> %y) {
define <5 x i4> @sel_common_op_commute3(<5 x i4> %x, <5 x i4> %y) {
; CHECK-LABEL: @sel_common_op_commute3(
-; CHECK-NEXT: [[S1:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 9>
-; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[S1]], <5 x i4> [[X]], <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 4>
+; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 9>
; CHECK-NEXT: ret <5 x i4> [[S2]]
;
%s1 = shufflevector <5 x i4> %y, <5 x i4> %x, <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 9>
@@ -1574,8 +1573,7 @@ define <5 x i4> @sel_common_op_commute3(<5 x i4> %x, <5 x i4> %y) {
define <5 x i4> @sel_common_op_commute3_poison_mask_elts(<5 x i4> %x, <5 x i4> %y) {
; CHECK-LABEL: @sel_common_op_commute3_poison_mask_elts(
-; CHECK-NEXT: [[S1:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 9>
-; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[S1]], <5 x i4> [[X]], <5 x i32> <i32 0, i32 6, i32 undef, i32 undef, i32 4>
+; CHECK-NEXT: [[S2:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> <i32 0, i32 6, i32 undef, i32 undef, i32 9>
; CHECK-NEXT: ret <5 x i4> [[S2]]
;
%s1 = shufflevector <5 x i4> %y, <5 x i4> %x, <5 x i32> <i32 0, i32 6, i32 2, i32 poison, i32 9>
@@ -1583,6 +1581,8 @@ define <5 x i4> @sel_common_op_commute3_poison_mask_elts(<5 x i4> %x, <5 x i4> %
ret <5 x i4> %s2
}
+; negative test - need shared operand
+
define <5 x i4> @sel_not_common_op_commute3(<5 x i4> %x, <5 x i4> %y, <5 x i4> %z) {
; CHECK-LABEL: @sel_not_common_op_commute3(
; CHECK-NEXT: [[S1:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[Z:%.*]], <5 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 9>
@@ -1594,6 +1594,8 @@ define <5 x i4> @sel_not_common_op_commute3(<5 x i4> %x, <5 x i4> %y, <5 x i4> %
ret <5 x i4> %s2
}
+; negative test - need "select" shuffle, no lane changes
+
define <5 x i4> @not_sel_common_op(<5 x i4> %x, <5 x i4> %y) {
; CHECK-LABEL: @not_sel_common_op(
; CHECK-NEXT: [[S1:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> <i32 undef, i32 6, i32 undef, i32 3, i32 9>
@@ -1605,11 +1607,13 @@ define <5 x i4> @not_sel_common_op(<5 x i4> %x, <5 x i4> %y) {
ret <5 x i4> %s2
}
+; extra use is ok
+
define <4 x i32> @sel_common_op_extra_use(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @sel_common_op_extra_use(
; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> [[X:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[S1]])
-; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x i32> [[S1]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
; CHECK-NEXT: ret <4 x i32> [[S2]]
;
%s1 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
More information about the llvm-commits
mailing list