[llvm] 3f906f0 - [InstSimplify] look through vector select (shuffle) in min/max fold

Fri Sep 30 05:27:09 PDT 2022

Author: Sanjay Patel
Date: 2022-09-30T08:27:00-04:00
New Revision: 3f906f057c879e3867800d968113b5cb770d63a4

URL: https://github.com/llvm/llvm-project/commit/3f906f057c879e3867800d968113b5cb770d63a4
DIFF: https://github.com/llvm/llvm-project/commit/3f906f057c879e3867800d968113b5cb770d63a4.diff

LOG: [InstSimplify] look through vector select (shuffle) in min/max fold

This is an extension of the existing min/max+select fold (which already
has a very large number of variations) to allow a vector shuffle because
that's what we have in the motivating example from issue #42100.

A couple of Alive2 checks of variants (I don't know how to generalize
these in Alive):
https://alive2.llvm.org/ce/z/jUFAqT

And verify the PR42100 test:
https://alive2.llvm.org/ce/z/3EcASf

It's possible there is some generalization of the fold or a
VectorCombine/SLP answer for the motivating test, but I haven't found a
better/smaller solution yet.

We can also add even more variants here as follow-up patches. For example,
we can have shuffle followed by min/max; we also don't have this
canonicalization or the reverse:
https://alive2.llvm.org/ce/z/StHD9f

Differential Revision: https://reviews.llvm.org/D134879

Added: 
    

Modified: 
    llvm/lib/Analysis/InstructionSimplify.cpp
    llvm/test/Transforms/InstSimplify/select-maxmin.ll
    llvm/test/Transforms/PhaseOrdering/vector-select.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index e57d9800b04e5..c7a16562b901f 100644

--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -4248,29 +4248,52 @@ static Value *simplifyCmpSelOfMaxMin(Value *CmpLHS, Value *CmpRHS,
     Pred = ICmpInst::getInversePredicate(Pred);
   }
 
-  // (X pred Y) ? X : max/min(X, Y)
+  // A vector select may be shuffling together elements that are equivalent
+  // based on the max/min/select relationship.
   Value *X = CmpLHS, *Y = CmpRHS;
+  bool PeekedThroughSelectShuffle = false;
+  auto *Shuf = dyn_cast<ShuffleVectorInst>(FVal);
+  if (Shuf && Shuf->isSelect()) {
+    if (Shuf->getOperand(0) == Y)
+      FVal = Shuf->getOperand(1);
+    else if (Shuf->getOperand(1) == Y)
+      FVal = Shuf->getOperand(0);
+    else
+      return nullptr;
+    PeekedThroughSelectShuffle = true;
+  }
+
+  // (X pred Y) ? X : max/min(X, Y)
   auto *MMI = dyn_cast<MinMaxIntrinsic>(FVal);
   if (!MMI || TVal != X ||
       !match(FVal, m_c_MaxOrMin(m_Specific(X), m_Specific(Y))))
     return nullptr;
 
-  // (X == Y) ? X : max/min(X, Y) --> max/min(X, Y)
-  if (Pred == CmpInst::ICMP_EQ)
-    return MMI;
-
-  // (X != Y) ? X : max/min(X, Y) --> X
-  if (Pred == CmpInst::ICMP_NE)
-    return X;
-
   // (X >  Y) ? X : max(X, Y) --> max(X, Y)
   // (X >= Y) ? X : max(X, Y) --> max(X, Y)
   // (X <  Y) ? X : min(X, Y) --> min(X, Y)
   // (X <= Y) ? X : min(X, Y) --> min(X, Y)
+  //
+  // The equivalence allows a vector select (shuffle) of max/min and Y. Ex:
+  // (X > Y) ? X : (Z ? max(X, Y) : Y)
+  // If Z is true, this reduces as above, and if Z is false:
+  // (X > Y) ? X : Y --> max(X, Y)
   ICmpInst::Predicate MMPred = MMI->getPredicate();
   if (MMPred == CmpInst::getStrictPredicate(Pred))
     return MMI;
 
+  // Other transforms are not valid with a shuffle.
+  if (PeekedThroughSelectShuffle)
+    return nullptr;
+
+  // (X == Y) ? X : max/min(X, Y) --> max/min(X, Y)
+  if (Pred == CmpInst::ICMP_EQ)
+    return MMI;
+
+  // (X != Y) ? X : max/min(X, Y) --> X
+  if (Pred == CmpInst::ICMP_NE)
+    return X;
+
   // (X <  Y) ? X : max(X, Y) --> X
   // (X <= Y) ? X : max(X, Y) --> X
   // (X >  Y) ? X : min(X, Y) --> X

diff  --git a/llvm/test/Transforms/InstSimplify/select-maxmin.ll b/llvm/test/Transforms/InstSimplify/select-maxmin.ll
index da91f9f7b5f7f..1fc9a8efa5095 100644
--- a/llvm/test/Transforms/InstSimplify/select-maxmin.ll
+++ b/llvm/test/Transforms/InstSimplify/select-maxmin.ll
@@ -1942,15 +1942,12 @@ define i8 @eq_yx_umax_tval_wrong_op(i8 %x, i8 %y, i8 %z) {
   ret i8 %r
 }
 
-; TODO: select with smin pred
+; select with smin pred
 
 define <4 x i8> @slt_xy_smin_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
 ; CHECK-LABEL: @slt_xy_smin_select_y_shuf_fval(
-; CHECK-NEXT:    [[I:%.*]] = icmp slt <4 x i8> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[M:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[X]], <4 x i8> [[Y]])
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i8> [[Y]], <4 x i8> [[M]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
-; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[X]], <4 x i8> [[S]]
-; CHECK-NEXT:    ret <4 x i8> [[R]]
+; CHECK-NEXT:    [[M:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]])
+; CHECK-NEXT:    ret <4 x i8> [[M]]
 ;
   %i = icmp slt <4 x i8> %x, %y
   %m = call <4 x i8> @llvm.smin.v4i8(<4 x i8> %x, <4 x i8> %y)
@@ -1959,6 +1956,8 @@ define <4 x i8> @slt_xy_smin_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
   ret <4 x i8> %r
 }
 
+; negative test - wrong pred
+
 define <4 x i8> @sgt_xy_smin_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
 ; CHECK-LABEL: @sgt_xy_smin_select_y_shuf_fval(
 ; CHECK-NEXT:    [[I:%.*]] = icmp sgt <4 x i8> [[X:%.*]], [[Y:%.*]]
@@ -1974,6 +1973,8 @@ define <4 x i8> @sgt_xy_smin_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
   ret <4 x i8> %r
 }
 
+; negative test - wrong shuffle op
+
 define <4 x i8> @slt_xy_smin_select_x_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
 ; CHECK-LABEL: @slt_xy_smin_select_x_shuf_fval(
 ; CHECK-NEXT:    [[I:%.*]] = icmp slt <4 x i8> [[X:%.*]], [[Y:%.*]]
@@ -1989,15 +1990,12 @@ define <4 x i8> @slt_xy_smin_select_x_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
   ret <4 x i8> %r
 }
 
-; TODO: select with non-strict smax pred
+; select with non-strict smax pred
 
 define <4 x i8> @sge_xy_smax_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
 ; CHECK-LABEL: @sge_xy_smax_select_y_shuf_fval(
-; CHECK-NEXT:    [[I:%.*]] = icmp sge <4 x i8> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[M:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[Y]], <4 x i8> [[X]])
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i8> [[Y]], <4 x i8> [[M]], <4 x i32> <i32 4, i32 1, i32 6, i32 3>
-; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[X]], <4 x i8> [[S]]
-; CHECK-NEXT:    ret <4 x i8> [[R]]
+; CHECK-NEXT:    [[M:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[Y:%.*]], <4 x i8> [[X:%.*]])
+; CHECK-NEXT:    ret <4 x i8> [[M]]
 ;
   %i = icmp sge <4 x i8> %x, %y
   %m = call <4 x i8> @llvm.smax.v4i8(<4 x i8> %y, <4 x i8> %x)
@@ -2006,6 +2004,8 @@ define <4 x i8> @sge_xy_smax_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
   ret <4 x i8> %r
 }
 
+; negative test - wrong (swapped) pred
+
 define <4 x i8> @sle_yx_smax_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
 ; CHECK-LABEL: @sle_yx_smax_select_y_shuf_fval(
 ; CHECK-NEXT:    [[I:%.*]] = icmp sge <4 x i8> [[Y:%.*]], [[X:%.*]]
@@ -2021,6 +2021,8 @@ define <4 x i8> @sle_yx_smax_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
   ret <4 x i8> %r
 }
 
+; negative test - wrong shuffle op
+
 define <4 x i8> @sge_xy_smax_select_x_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
 ; CHECK-LABEL: @sge_xy_smax_select_x_shuf_fval(
 ; CHECK-NEXT:    [[I:%.*]] = icmp sge <4 x i8> [[X:%.*]], [[Y:%.*]]
@@ -2036,15 +2038,12 @@ define <4 x i8> @sge_xy_smax_select_x_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
   ret <4 x i8> %r
 }
 
-; TODO: select with non-strict inverted umin pred
+; select with non-strict inverted umin pred
 
 define <4 x i8> @uge_xy_umin_select_y_shuf_tval(<4 x i8> %x, <4 x i8> %y) {
 ; CHECK-LABEL: @uge_xy_umin_select_y_shuf_tval(
-; CHECK-NEXT:    [[I:%.*]] = icmp uge <4 x i8> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[M:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[X]], <4 x i8> [[Y]])
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i8> [[M]], <4 x i8> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
-; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[S]], <4 x i8> [[X]]
-; CHECK-NEXT:    ret <4 x i8> [[R]]
+; CHECK-NEXT:    [[M:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]])
+; CHECK-NEXT:    ret <4 x i8> [[M]]
 ;
   %i = icmp uge <4 x i8> %x, %y
   %m = call <4 x i8> @llvm.umin.v4i8(<4 x i8> %x, <4 x i8> %y)
@@ -2053,6 +2052,8 @@ define <4 x i8> @uge_xy_umin_select_y_shuf_tval(<4 x i8> %x, <4 x i8> %y) {
   ret <4 x i8> %r
 }
 
+; negative test - wrong pred
+
 define <4 x i8> @uge_xy_umin_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
 ; CHECK-LABEL: @uge_xy_umin_select_y_shuf_fval(
 ; CHECK-NEXT:    [[I:%.*]] = icmp uge <4 x i8> [[X:%.*]], [[Y:%.*]]
@@ -2068,6 +2069,8 @@ define <4 x i8> @uge_xy_umin_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
   ret <4 x i8> %r
 }
 
+; negative test - wrong shuffle op
+
 define <4 x i8> @uge_xy_umin_select_x_shuf_tval(<4 x i8> %x, <4 x i8> %y) {
 ; CHECK-LABEL: @uge_xy_umin_select_x_shuf_tval(
 ; CHECK-NEXT:    [[I:%.*]] = icmp uge <4 x i8> [[X:%.*]], [[Y:%.*]]
@@ -2083,15 +2086,12 @@ define <4 x i8> @uge_xy_umin_select_x_shuf_tval(<4 x i8> %x, <4 x i8> %y) {
   ret <4 x i8> %r
 }
 
-; TODO: select with swapped umax pred
+; select with swapped umax pred
 
 define <4 x i8> @ult_yx_umax_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
 ; CHECK-LABEL: @ult_yx_umax_select_y_shuf_fval(
-; CHECK-NEXT:    [[I:%.*]] = icmp ult <4 x i8> [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[M:%.*]] = call <4 x i8> @llvm.umax.v4i8(<4 x i8> [[Y]], <4 x i8> [[X]])
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i8> [[Y]], <4 x i8> [[M]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[X]], <4 x i8> [[S]]
-; CHECK-NEXT:    ret <4 x i8> [[R]]
+; CHECK-NEXT:    [[M:%.*]] = call <4 x i8> @llvm.umax.v4i8(<4 x i8> [[Y:%.*]], <4 x i8> [[X:%.*]])
+; CHECK-NEXT:    ret <4 x i8> [[M]]
 ;
   %i = icmp ult <4 x i8> %y, %x
   %m = call <4 x i8> @llvm.umax.v4i8(<4 x i8> %y, <4 x i8> %x)
@@ -2100,6 +2100,8 @@ define <4 x i8> @ult_yx_umax_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
   ret <4 x i8> %r
 }
 
+; negative test - wrong (inverted+swapped) pred
+
 define <4 x i8> @ult_yx_umax_select_y_shuf_tval(<4 x i8> %x, <4 x i8> %y) {
 ; CHECK-LABEL: @ult_yx_umax_select_y_shuf_tval(
 ; CHECK-NEXT:    [[I:%.*]] = icmp ult <4 x i8> [[Y:%.*]], [[X:%.*]]
@@ -2115,6 +2117,8 @@ define <4 x i8> @ult_yx_umax_select_y_shuf_tval(<4 x i8> %x, <4 x i8> %y) {
   ret <4 x i8> %r
 }
 
+; negative test - wrong shuffle mask
+
 define <4 x i8> @ult_yx_umax_select_y_shuf_mask_fval(<4 x i8> %x, <4 x i8> %y) {
 ; CHECK-LABEL: @ult_yx_umax_select_y_shuf_mask_fval(
 ; CHECK-NEXT:    [[I:%.*]] = icmp ult <4 x i8> [[Y:%.*]], [[X:%.*]]

diff  --git a/llvm/test/Transforms/PhaseOrdering/vector-select.ll b/llvm/test/Transforms/PhaseOrdering/vector-select.ll
index 3817be852b581..7f4f7189e8154 100644
--- a/llvm/test/Transforms/PhaseOrdering/vector-select.ll
+++ b/llvm/test/Transforms/PhaseOrdering/vector-select.ll
@@ -93,16 +93,8 @@ define <4 x i8> @allSignBits_vec(<4 x i8> %cond, <4 x i8> %tval, <4 x i8> %fval)
 define <4 x i32> @PR42100(<4 x i32> noundef %x, <4 x i32> noundef %min) {
 ; CHECK-LABEL: @PR42100(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[MIN:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[X]], <4 x i32> [[MIN]])
-; CHECK-NEXT:    [[MIN_ADDR_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[MIN]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[SEL3:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[X]], <4 x i32> [[MIN_ADDR_1]]
-; CHECK-NEXT:    [[MIN_ADDR_1_1:%.*]] = shufflevector <4 x i32> [[MIN_ADDR_1]], <4 x i32> [[SEL3]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
-; CHECK-NEXT:    [[SEL4:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[X]], <4 x i32> [[MIN_ADDR_1_1]]
-; CHECK-NEXT:    [[MIN_ADDR_1_2:%.*]] = shufflevector <4 x i32> [[MIN_ADDR_1_1]], <4 x i32> [[SEL4]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-; CHECK-NEXT:    [[SEL5:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[X]], <4 x i32> [[MIN_ADDR_1_2]]
-; CHECK-NEXT:    [[MIN_ADDR_1_3:%.*]] = shufflevector <4 x i32> [[MIN_ADDR_1_2]], <4 x i32> [[SEL5]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
-; CHECK-NEXT:    ret <4 x i32> [[MIN_ADDR_1_3]]
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[X:%.*]], <4 x i32> [[MIN:%.*]])
+; CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 ;
 entry:
   br label %for.cond