[llvm] 3f906f0 - [InstSimplify] look through vector select (shuffle) in min/max fold
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 30 05:27:09 PDT 2022
Author: Sanjay Patel
Date: 2022-09-30T08:27:00-04:00
New Revision: 3f906f057c879e3867800d968113b5cb770d63a4
URL: https://github.com/llvm/llvm-project/commit/3f906f057c879e3867800d968113b5cb770d63a4
DIFF: https://github.com/llvm/llvm-project/commit/3f906f057c879e3867800d968113b5cb770d63a4.diff
LOG: [InstSimplify] look through vector select (shuffle) in min/max fold
This is an extension of the existing min/max+select fold (which already
has a very large number of variations) to allow a vector shuffle because
that's what we have in the motivating example from issue #42100.
A couple of Alive2 checks of variants (I don't know how to generalize
these in Alive):
https://alive2.llvm.org/ce/z/jUFAqT
And verify the PR42100 test:
https://alive2.llvm.org/ce/z/3EcASf
It's possible there is some generalization of the fold or a
VectorCombine/SLP answer for the motivating test, but I haven't found a
better/smaller solution yet.
We can also add even more variants here as follow-up patches. For example,
we can have shuffle followed by min/max; we also don't have this
canonicalization or the reverse:
https://alive2.llvm.org/ce/z/StHD9f
Differential Revision: https://reviews.llvm.org/D134879
Added:
Modified:
llvm/lib/Analysis/InstructionSimplify.cpp
llvm/test/Transforms/InstSimplify/select-maxmin.ll
llvm/test/Transforms/PhaseOrdering/vector-select.ll
Removed:
################################################################################
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index e57d9800b04e5..c7a16562b901f 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -4248,29 +4248,52 @@ static Value *simplifyCmpSelOfMaxMin(Value *CmpLHS, Value *CmpRHS,
Pred = ICmpInst::getInversePredicate(Pred);
}
- // (X pred Y) ? X : max/min(X, Y)
+ // A vector select may be shuffling together elements that are equivalent
+ // based on the max/min/select relationship.
Value *X = CmpLHS, *Y = CmpRHS;
+ bool PeekedThroughSelectShuffle = false;
+ auto *Shuf = dyn_cast<ShuffleVectorInst>(FVal);
+ if (Shuf && Shuf->isSelect()) {
+ if (Shuf->getOperand(0) == Y)
+ FVal = Shuf->getOperand(1);
+ else if (Shuf->getOperand(1) == Y)
+ FVal = Shuf->getOperand(0);
+ else
+ return nullptr;
+ PeekedThroughSelectShuffle = true;
+ }
+
+ // (X pred Y) ? X : max/min(X, Y)
auto *MMI = dyn_cast<MinMaxIntrinsic>(FVal);
if (!MMI || TVal != X ||
!match(FVal, m_c_MaxOrMin(m_Specific(X), m_Specific(Y))))
return nullptr;
- // (X == Y) ? X : max/min(X, Y) --> max/min(X, Y)
- if (Pred == CmpInst::ICMP_EQ)
- return MMI;
-
- // (X != Y) ? X : max/min(X, Y) --> X
- if (Pred == CmpInst::ICMP_NE)
- return X;
-
// (X > Y) ? X : max(X, Y) --> max(X, Y)
// (X >= Y) ? X : max(X, Y) --> max(X, Y)
// (X < Y) ? X : min(X, Y) --> min(X, Y)
// (X <= Y) ? X : min(X, Y) --> min(X, Y)
+ //
+ // The equivalence allows a vector select (shuffle) of max/min and Y. Ex:
+ // (X > Y) ? X : (Z ? max(X, Y) : Y)
+ // If Z is true, this reduces as above, and if Z is false:
+ // (X > Y) ? X : Y --> max(X, Y)
ICmpInst::Predicate MMPred = MMI->getPredicate();
if (MMPred == CmpInst::getStrictPredicate(Pred))
return MMI;
+ // Other transforms are not valid with a shuffle.
+ if (PeekedThroughSelectShuffle)
+ return nullptr;
+
+ // (X == Y) ? X : max/min(X, Y) --> max/min(X, Y)
+ if (Pred == CmpInst::ICMP_EQ)
+ return MMI;
+
+ // (X != Y) ? X : max/min(X, Y) --> X
+ if (Pred == CmpInst::ICMP_NE)
+ return X;
+
// (X < Y) ? X : max(X, Y) --> X
// (X <= Y) ? X : max(X, Y) --> X
// (X > Y) ? X : min(X, Y) --> X
diff --git a/llvm/test/Transforms/InstSimplify/select-maxmin.ll b/llvm/test/Transforms/InstSimplify/select-maxmin.ll
index da91f9f7b5f7f..1fc9a8efa5095 100644
--- a/llvm/test/Transforms/InstSimplify/select-maxmin.ll
+++ b/llvm/test/Transforms/InstSimplify/select-maxmin.ll
@@ -1942,15 +1942,12 @@ define i8 @eq_yx_umax_tval_wrong_op(i8 %x, i8 %y, i8 %z) {
ret i8 %r
}
-; TODO: select with smin pred
+; select with smin pred
define <4 x i8> @slt_xy_smin_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
; CHECK-LABEL: @slt_xy_smin_select_y_shuf_fval(
-; CHECK-NEXT: [[I:%.*]] = icmp slt <4 x i8> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[M:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[X]], <4 x i8> [[Y]])
-; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i8> [[Y]], <4 x i8> [[M]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
-; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[X]], <4 x i8> [[S]]
-; CHECK-NEXT: ret <4 x i8> [[R]]
+; CHECK-NEXT: [[M:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]])
+; CHECK-NEXT: ret <4 x i8> [[M]]
;
%i = icmp slt <4 x i8> %x, %y
%m = call <4 x i8> @llvm.smin.v4i8(<4 x i8> %x, <4 x i8> %y)
@@ -1959,6 +1956,8 @@ define <4 x i8> @slt_xy_smin_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
ret <4 x i8> %r
}
+; negative test - wrong pred
+
define <4 x i8> @sgt_xy_smin_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
; CHECK-LABEL: @sgt_xy_smin_select_y_shuf_fval(
; CHECK-NEXT: [[I:%.*]] = icmp sgt <4 x i8> [[X:%.*]], [[Y:%.*]]
@@ -1974,6 +1973,8 @@ define <4 x i8> @sgt_xy_smin_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
ret <4 x i8> %r
}
+; negative test - wrong shuffle op
+
define <4 x i8> @slt_xy_smin_select_x_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
; CHECK-LABEL: @slt_xy_smin_select_x_shuf_fval(
; CHECK-NEXT: [[I:%.*]] = icmp slt <4 x i8> [[X:%.*]], [[Y:%.*]]
@@ -1989,15 +1990,12 @@ define <4 x i8> @slt_xy_smin_select_x_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
ret <4 x i8> %r
}
-; TODO: select with non-strict smax pred
+; select with non-strict smax pred
define <4 x i8> @sge_xy_smax_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
; CHECK-LABEL: @sge_xy_smax_select_y_shuf_fval(
-; CHECK-NEXT: [[I:%.*]] = icmp sge <4 x i8> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[M:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[Y]], <4 x i8> [[X]])
-; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i8> [[Y]], <4 x i8> [[M]], <4 x i32> <i32 4, i32 1, i32 6, i32 3>
-; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[X]], <4 x i8> [[S]]
-; CHECK-NEXT: ret <4 x i8> [[R]]
+; CHECK-NEXT: [[M:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[Y:%.*]], <4 x i8> [[X:%.*]])
+; CHECK-NEXT: ret <4 x i8> [[M]]
;
%i = icmp sge <4 x i8> %x, %y
%m = call <4 x i8> @llvm.smax.v4i8(<4 x i8> %y, <4 x i8> %x)
@@ -2006,6 +2004,8 @@ define <4 x i8> @sge_xy_smax_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
ret <4 x i8> %r
}
+; negative test - wrong (swapped) pred
+
define <4 x i8> @sle_yx_smax_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
; CHECK-LABEL: @sle_yx_smax_select_y_shuf_fval(
; CHECK-NEXT: [[I:%.*]] = icmp sge <4 x i8> [[Y:%.*]], [[X:%.*]]
@@ -2021,6 +2021,8 @@ define <4 x i8> @sle_yx_smax_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
ret <4 x i8> %r
}
+; negative test - wrong shuffle op
+
define <4 x i8> @sge_xy_smax_select_x_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
; CHECK-LABEL: @sge_xy_smax_select_x_shuf_fval(
; CHECK-NEXT: [[I:%.*]] = icmp sge <4 x i8> [[X:%.*]], [[Y:%.*]]
@@ -2036,15 +2038,12 @@ define <4 x i8> @sge_xy_smax_select_x_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
ret <4 x i8> %r
}
-; TODO: select with non-strict inverted umin pred
+; select with non-strict inverted umin pred
define <4 x i8> @uge_xy_umin_select_y_shuf_tval(<4 x i8> %x, <4 x i8> %y) {
; CHECK-LABEL: @uge_xy_umin_select_y_shuf_tval(
-; CHECK-NEXT: [[I:%.*]] = icmp uge <4 x i8> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[M:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[X]], <4 x i8> [[Y]])
-; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i8> [[M]], <4 x i8> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
-; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[S]], <4 x i8> [[X]]
-; CHECK-NEXT: ret <4 x i8> [[R]]
+; CHECK-NEXT: [[M:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]])
+; CHECK-NEXT: ret <4 x i8> [[M]]
;
%i = icmp uge <4 x i8> %x, %y
%m = call <4 x i8> @llvm.umin.v4i8(<4 x i8> %x, <4 x i8> %y)
@@ -2053,6 +2052,8 @@ define <4 x i8> @uge_xy_umin_select_y_shuf_tval(<4 x i8> %x, <4 x i8> %y) {
ret <4 x i8> %r
}
+; negative test - wrong pred
+
define <4 x i8> @uge_xy_umin_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
; CHECK-LABEL: @uge_xy_umin_select_y_shuf_fval(
; CHECK-NEXT: [[I:%.*]] = icmp uge <4 x i8> [[X:%.*]], [[Y:%.*]]
@@ -2068,6 +2069,8 @@ define <4 x i8> @uge_xy_umin_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
ret <4 x i8> %r
}
+; negative test - wrong shuffle op
+
define <4 x i8> @uge_xy_umin_select_x_shuf_tval(<4 x i8> %x, <4 x i8> %y) {
; CHECK-LABEL: @uge_xy_umin_select_x_shuf_tval(
; CHECK-NEXT: [[I:%.*]] = icmp uge <4 x i8> [[X:%.*]], [[Y:%.*]]
@@ -2083,15 +2086,12 @@ define <4 x i8> @uge_xy_umin_select_x_shuf_tval(<4 x i8> %x, <4 x i8> %y) {
ret <4 x i8> %r
}
-; TODO: select with swapped umax pred
+; select with swapped umax pred
define <4 x i8> @ult_yx_umax_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
; CHECK-LABEL: @ult_yx_umax_select_y_shuf_fval(
-; CHECK-NEXT: [[I:%.*]] = icmp ult <4 x i8> [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT: [[M:%.*]] = call <4 x i8> @llvm.umax.v4i8(<4 x i8> [[Y]], <4 x i8> [[X]])
-; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i8> [[Y]], <4 x i8> [[M]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[X]], <4 x i8> [[S]]
-; CHECK-NEXT: ret <4 x i8> [[R]]
+; CHECK-NEXT: [[M:%.*]] = call <4 x i8> @llvm.umax.v4i8(<4 x i8> [[Y:%.*]], <4 x i8> [[X:%.*]])
+; CHECK-NEXT: ret <4 x i8> [[M]]
;
%i = icmp ult <4 x i8> %y, %x
%m = call <4 x i8> @llvm.umax.v4i8(<4 x i8> %y, <4 x i8> %x)
@@ -2100,6 +2100,8 @@ define <4 x i8> @ult_yx_umax_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
ret <4 x i8> %r
}
+; negative test - wrong (inverted+swapped) pred
+
define <4 x i8> @ult_yx_umax_select_y_shuf_tval(<4 x i8> %x, <4 x i8> %y) {
; CHECK-LABEL: @ult_yx_umax_select_y_shuf_tval(
; CHECK-NEXT: [[I:%.*]] = icmp ult <4 x i8> [[Y:%.*]], [[X:%.*]]
@@ -2115,6 +2117,8 @@ define <4 x i8> @ult_yx_umax_select_y_shuf_tval(<4 x i8> %x, <4 x i8> %y) {
ret <4 x i8> %r
}
+; negative test - wrong shuffle mask
+
define <4 x i8> @ult_yx_umax_select_y_shuf_mask_fval(<4 x i8> %x, <4 x i8> %y) {
; CHECK-LABEL: @ult_yx_umax_select_y_shuf_mask_fval(
; CHECK-NEXT: [[I:%.*]] = icmp ult <4 x i8> [[Y:%.*]], [[X:%.*]]
diff --git a/llvm/test/Transforms/PhaseOrdering/vector-select.ll b/llvm/test/Transforms/PhaseOrdering/vector-select.ll
index 3817be852b581..7f4f7189e8154 100644
--- a/llvm/test/Transforms/PhaseOrdering/vector-select.ll
+++ b/llvm/test/Transforms/PhaseOrdering/vector-select.ll
@@ -93,16 +93,8 @@ define <4 x i8> @allSignBits_vec(<4 x i8> %cond, <4 x i8> %tval, <4 x i8> %fval)
define <4 x i32> @PR42100(<4 x i32> noundef %x, <4 x i32> noundef %min) {
; CHECK-LABEL: @PR42100(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[MIN:%.*]]
-; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[X]], <4 x i32> [[MIN]])
-; CHECK-NEXT: [[MIN_ADDR_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[MIN]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[SEL3:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[X]], <4 x i32> [[MIN_ADDR_1]]
-; CHECK-NEXT: [[MIN_ADDR_1_1:%.*]] = shufflevector <4 x i32> [[MIN_ADDR_1]], <4 x i32> [[SEL3]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
-; CHECK-NEXT: [[SEL4:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[X]], <4 x i32> [[MIN_ADDR_1_1]]
-; CHECK-NEXT: [[MIN_ADDR_1_2:%.*]] = shufflevector <4 x i32> [[MIN_ADDR_1_1]], <4 x i32> [[SEL4]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-; CHECK-NEXT: [[SEL5:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[X]], <4 x i32> [[MIN_ADDR_1_2]]
-; CHECK-NEXT: [[MIN_ADDR_1_3:%.*]] = shufflevector <4 x i32> [[MIN_ADDR_1_2]], <4 x i32> [[SEL5]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
-; CHECK-NEXT: ret <4 x i32> [[MIN_ADDR_1_3]]
+; CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[X:%.*]], <4 x i32> [[MIN:%.*]])
+; CHECK-NEXT: ret <4 x i32> [[TMP0]]
;
entry:
br label %for.cond
More information about the llvm-commits
mailing list