[llvm] 432c199 - [InstCombine] move shuffle after min/max with same-shuffled operands
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Tue May 3 13:23:27 PDT 2022
Author: Sanjay Patel
Date: 2022-05-03T16:23:11-04:00
New Revision: 432c199e84730e4bb0aec964aa1d13f0226455ac
URL: https://github.com/llvm/llvm-project/commit/432c199e84730e4bb0aec964aa1d13f0226455ac
DIFF: https://github.com/llvm/llvm-project/commit/432c199e84730e4bb0aec964aa1d13f0226455ac.diff
LOG: [InstCombine] move shuffle after min/max with same-shuffled operands
This is an intrinsic version of the existing fold for binops.
As a first step, I only allowed min/max, but the code is set
up to make adding more intrinsics easy (with more or less than
2 arguments).
This (and possible follow-ups) are discussed in issue #46238.
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 957b954c2afb8..f35800e9f83af 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1080,6 +1080,43 @@ static Instruction *factorizeMinMaxTree(IntrinsicInst *II) {
return CallInst::Create(MinMax, { MinMaxOp, ThirdOp });
}
+/// If all arguments of the intrinsic are unary shuffles with the same mask,
+/// try to shuffle after the intrinsic.
+static Instruction *
+foldShuffledIntrinsicOperands(IntrinsicInst *II,
+ InstCombiner::BuilderTy &Builder) {
+ // TODO: This should be extended to handle other intrinsics like fshl, ctpop,
+ // etc. Use llvm::isTriviallyVectorizable() and related to determine
+ // which intrinsics are safe to shuffle?
+ if (!match(II, m_MaxOrMin(m_Value(), m_Value())))
+ return nullptr;
+
+ Value *X;
+ ArrayRef<int> Mask;
+ if (!match(II->getArgOperand(0),
+ m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))
+ return nullptr;
+
+ // At least 1 operand must have 1 use because we are creating 2 instructions.
+ if (none_of(II->args(), [](Value *V) { return V->hasOneUse(); }))
+ return nullptr;
+
+ // See if all arguments are shuffled with the same mask.
+ SmallVector<Value *, 4> NewArgs(II->arg_size());
+ NewArgs[0] = X;
+ for (unsigned i = 1, e = II->arg_size(); i != e; ++i) {
+ if (!match(II->getArgOperand(i),
+ m_Shuffle(m_Value(X), m_Undef(), m_SpecificMask(Mask))))
+ return nullptr;
+ NewArgs[i] = X;
+ }
+
+ // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
+ Value *NewIntrinsic =
+ Builder.CreateIntrinsic(II->getIntrinsicID(), X->getType(), NewArgs);
+ return new ShuffleVectorInst(NewIntrinsic, Mask);
+}
+
/// CallInst simplification. This mostly only handles folding of intrinsic
/// instructions. For normal calls, it allows visitCallBase to do the heavy
/// lifting.
@@ -2622,6 +2659,10 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
break;
}
}
+
+ if (Instruction *Shuf = foldShuffledIntrinsicOperands(II, Builder))
+ return Shuf;
+
// Some intrinsics (like experimental_gc_statepoint) can be used in invoke
// context, so it is handled in visitCallBase and we should trigger it.
return visitCallBase(*II);
diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
index c41dae0e64e52..7fb4f80c5cadb 100644
--- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
@@ -2364,9 +2364,8 @@ define i8 @umax_umax_reassoc_constantexpr_sink(i8 %x, i8 %y) {
define <3 x i8> @smax_unary_shuffle_ops(<3 x i8> %x, <3 x i8> %y) {
; CHECK-LABEL: @smax_unary_shuffle_ops(
-; CHECK-NEXT: [[SX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
-; CHECK-NEXT: [[SY:%.*]] = shufflevector <3 x i8> [[Y:%.*]], <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
-; CHECK-NEXT: [[R:%.*]] = call <3 x i8> @llvm.smax.v3i8(<3 x i8> [[SX]], <3 x i8> [[SY]])
+; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i8> @llvm.smax.v3i8(<3 x i8> [[X:%.*]], <3 x i8> [[Y:%.*]])
+; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%sx = shufflevector <3 x i8> %x, <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
@@ -2379,8 +2378,8 @@ define <3 x i8> @smin_unary_shuffle_ops_use_poison_mask_elt(<3 x i8> %x, <3 x i8
; CHECK-LABEL: @smin_unary_shuffle_ops_use_poison_mask_elt(
; CHECK-NEXT: [[SX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <3 x i32> <i32 undef, i32 0, i32 2>
; CHECK-NEXT: call void @use_vec(<3 x i8> [[SX]])
-; CHECK-NEXT: [[SY:%.*]] = shufflevector <3 x i8> [[Y:%.*]], <3 x i8> poison, <3 x i32> <i32 undef, i32 0, i32 2>
-; CHECK-NEXT: [[R:%.*]] = call <3 x i8> @llvm.smin.v3i8(<3 x i8> [[SX]], <3 x i8> [[SY]])
+; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i8> @llvm.smin.v3i8(<3 x i8> [[X]], <3 x i8> [[Y:%.*]])
+; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> poison, <3 x i32> <i32 undef, i32 0, i32 2>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%sx = shufflevector <3 x i8> %x, <3 x i8> poison, <3 x i32> <i32 poison, i32 0, i32 2>
@@ -2392,10 +2391,10 @@ define <3 x i8> @smin_unary_shuffle_ops_use_poison_mask_elt(<3 x i8> %x, <3 x i8
define <3 x i8> @umax_unary_shuffle_ops_use_widening(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: @umax_unary_shuffle_ops_use_widening(
-; CHECK-NEXT: [[SX:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> poison, <3 x i32> <i32 1, i32 0, i32 0>
; CHECK-NEXT: [[SY:%.*]] = shufflevector <2 x i8> [[Y:%.*]], <2 x i8> poison, <3 x i32> <i32 1, i32 0, i32 0>
; CHECK-NEXT: call void @use_vec(<3 x i8> [[SY]])
-; CHECK-NEXT: [[R:%.*]] = call <3 x i8> @llvm.umax.v3i8(<3 x i8> [[SX]], <3 x i8> [[SY]])
+; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y]])
+; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> poison, <3 x i32> <i32 1, i32 0, i32 0>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%sx = shufflevector <2 x i8> %x, <2 x i8> poison, <3 x i32> <i32 1, i32 0, i32 0>
@@ -2407,9 +2406,8 @@ define <3 x i8> @umax_unary_shuffle_ops_use_widening(<2 x i8> %x, <2 x i8> %y) {
define <3 x i8> @umin_unary_shuffle_ops_narrowing(<4 x i8> %x, <4 x i8> %y) {
; CHECK-LABEL: @umin_unary_shuffle_ops_narrowing(
-; CHECK-NEXT: [[SX:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <3 x i32> <i32 1, i32 0, i32 3>
-; CHECK-NEXT: [[SY:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> poison, <3 x i32> <i32 1, i32 0, i32 3>
-; CHECK-NEXT: [[R:%.*]] = call <3 x i8> @llvm.umin.v3i8(<3 x i8> [[SX]], <3 x i8> [[SY]])
+; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]])
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <3 x i32> <i32 1, i32 0, i32 3>
; CHECK-NEXT: ret <3 x i8> [[R]]
;
%sx = shufflevector <4 x i8> %x, <4 x i8> poison, <3 x i32> <i32 1, i32 0, i32 3>
@@ -2418,6 +2416,8 @@ define <3 x i8> @umin_unary_shuffle_ops_narrowing(<4 x i8> %x, <4 x i8> %y) {
ret <3 x i8> %r
}
+; negative test - must have 2 shuffles
+
define <3 x i8> @smax_unary_shuffle_ops_unshuffled_op(<3 x i8> %x, <3 x i8> %y) {
; CHECK-LABEL: @smax_unary_shuffle_ops_unshuffled_op(
; CHECK-NEXT: [[SX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <3 x i32> <i32 0, i32 0, i32 2>
@@ -2429,6 +2429,8 @@ define <3 x i8> @smax_unary_shuffle_ops_unshuffled_op(<3 x i8> %x, <3 x i8> %y)
ret <3 x i8> %r
}
+; negative test - must have identical masks
+
define <3 x i8> @smax_unary_shuffle_ops_wrong_mask(<3 x i8> %x, <3 x i8> %y) {
; CHECK-LABEL: @smax_unary_shuffle_ops_wrong_mask(
; CHECK-NEXT: [[SX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <3 x i32> <i32 0, i32 0, i32 2>
@@ -2442,6 +2444,8 @@ define <3 x i8> @smax_unary_shuffle_ops_wrong_mask(<3 x i8> %x, <3 x i8> %y) {
ret <3 x i8> %r
}
+; negative test - must be unary shuffles
+
define <3 x i8> @smax_unary_shuffle_ops_wrong_shuf(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) {
; CHECK-LABEL: @smax_unary_shuffle_ops_wrong_shuf(
; CHECK-NEXT: [[SX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> [[Z:%.*]], <3 x i32> <i32 1, i32 0, i32 3>
@@ -2455,6 +2459,8 @@ define <3 x i8> @smax_unary_shuffle_ops_wrong_shuf(<3 x i8> %x, <3 x i8> %y, <3
ret <3 x i8> %r
}
+; negative test - too many uses
+
define <3 x i8> @smin_unary_shuffle_ops_uses(<3 x i8> %x, <3 x i8> %y) {
; CHECK-LABEL: @smin_unary_shuffle_ops_uses(
; CHECK-NEXT: [[SX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
More information about the llvm-commits
mailing list