[llvm] 8bfba17 - [InstSimplify][PhaseOrdering] add tests for vector select of min/max; NFC

Thu Sep 29 09:07:04 PDT 2022

Author: Sanjay Patel
Date: 2022-09-29T12:06:55-04:00
New Revision: 8bfba17b409000deae6436f92080d0c96bf5ea3b

URL: https://github.com/llvm/llvm-project/commit/8bfba17b409000deae6436f92080d0c96bf5ea3b
DIFF: https://github.com/llvm/llvm-project/commit/8bfba17b409000deae6436f92080d0c96bf5ea3b.diff

LOG: [InstSimplify][PhaseOrdering] add tests for vector select of min/max; NFC

The phase ordering test is the almost unoptimized IR for the example
in issue #42100; it was passed through -mem2reg to reduce obvious
excessive load/store and other noise.

D134879

Added: 
    

Modified: 
    llvm/test/Transforms/InstSimplify/select-maxmin.ll
    llvm/test/Transforms/PhaseOrdering/vector-select.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/InstSimplify/select-maxmin.ll b/llvm/test/Transforms/InstSimplify/select-maxmin.ll
index 957aee70c36ea..da91f9f7b5f7f 100644

--- a/llvm/test/Transforms/InstSimplify/select-maxmin.ll
+++ b/llvm/test/Transforms/InstSimplify/select-maxmin.ll
@@ -5,6 +5,10 @@ declare i8 @llvm.smin.i8(i8, i8)
 declare i8 @llvm.smax.i8(i8, i8)
 declare i8 @llvm.umin.i8(i8, i8)
 declare i8 @llvm.umax.i8(i8, i8)
+declare <4 x i8> @llvm.smin.v4i8(<4 x i8>, <4 x i8>)
+declare <4 x i8> @llvm.smax.v4i8(<4 x i8>, <4 x i8>)
+declare <4 x i8> @llvm.umin.v4i8(<4 x i8>, <4 x i8>)
+declare <4 x i8> @llvm.umax.v4i8(<4 x i8>, <4 x i8>)
 
 ; smin
 
@@ -1937,3 +1941,191 @@ define i8 @eq_yx_umax_tval_wrong_op(i8 %x, i8 %y, i8 %z) {
   %r = select i1 %i, i8 %m, i8 %x
   ret i8 %r
 }
+
+; TODO: select with smin pred
+
+define <4 x i8> @slt_xy_smin_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @slt_xy_smin_select_y_shuf_fval(
+; CHECK-NEXT:    [[I:%.*]] = icmp slt <4 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[M:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[X]], <4 x i8> [[Y]])
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i8> [[Y]], <4 x i8> [[M]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[X]], <4 x i8> [[S]]
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %i = icmp slt <4 x i8> %x, %y
+  %m = call <4 x i8> @llvm.smin.v4i8(<4 x i8> %x, <4 x i8> %y)
+  %s = shufflevector <4 x i8> %y, <4 x i8> %m, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  %r = select <4 x i1> %i, <4 x i8> %x, <4 x i8> %s
+  ret <4 x i8> %r
+}
+
+define <4 x i8> @sgt_xy_smin_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @sgt_xy_smin_select_y_shuf_fval(
+; CHECK-NEXT:    [[I:%.*]] = icmp sgt <4 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[M:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[X]], <4 x i8> [[Y]])
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i8> [[Y]], <4 x i8> [[M]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[X]], <4 x i8> [[S]]
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %i = icmp sgt <4 x i8> %x, %y
+  %m = call <4 x i8> @llvm.smin.v4i8(<4 x i8> %x, <4 x i8> %y)
+  %s = shufflevector <4 x i8> %y, <4 x i8> %m, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  %r = select <4 x i1> %i, <4 x i8> %x, <4 x i8> %s
+  ret <4 x i8> %r
+}
+
+define <4 x i8> @slt_xy_smin_select_x_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @slt_xy_smin_select_x_shuf_fval(
+; CHECK-NEXT:    [[I:%.*]] = icmp slt <4 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[M:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[X]], <4 x i8> [[Y]])
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> [[M]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[X]], <4 x i8> [[S]]
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %i = icmp slt <4 x i8> %x, %y
+  %m = call <4 x i8> @llvm.smin.v4i8(<4 x i8> %x, <4 x i8> %y)
+  %s = shufflevector <4 x i8> %x, <4 x i8> %m, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  %r = select <4 x i1> %i, <4 x i8> %x, <4 x i8> %s
+  ret <4 x i8> %r
+}
+
+; TODO: select with non-strict smax pred
+
+define <4 x i8> @sge_xy_smax_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @sge_xy_smax_select_y_shuf_fval(
+; CHECK-NEXT:    [[I:%.*]] = icmp sge <4 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[M:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[Y]], <4 x i8> [[X]])
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i8> [[Y]], <4 x i8> [[M]], <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[X]], <4 x i8> [[S]]
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %i = icmp sge <4 x i8> %x, %y
+  %m = call <4 x i8> @llvm.smax.v4i8(<4 x i8> %y, <4 x i8> %x)
+  %s = shufflevector <4 x i8> %y, <4 x i8> %m, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %r = select <4 x i1> %i, <4 x i8> %x, <4 x i8> %s
+  ret <4 x i8> %r
+}
+
+define <4 x i8> @sle_yx_smax_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @sle_yx_smax_select_y_shuf_fval(
+; CHECK-NEXT:    [[I:%.*]] = icmp sge <4 x i8> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[M:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[Y]], <4 x i8> [[X]])
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i8> [[Y]], <4 x i8> [[M]], <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[X]], <4 x i8> [[S]]
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %i = icmp sge <4 x i8> %y, %x
+  %m = call <4 x i8> @llvm.smax.v4i8(<4 x i8> %y, <4 x i8> %x)
+  %s = shufflevector <4 x i8> %y, <4 x i8> %m, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %r = select <4 x i1> %i, <4 x i8> %x, <4 x i8> %s
+  ret <4 x i8> %r
+}
+
+define <4 x i8> @sge_xy_smax_select_x_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @sge_xy_smax_select_x_shuf_fval(
+; CHECK-NEXT:    [[I:%.*]] = icmp sge <4 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[M:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[Y]], <4 x i8> [[X]])
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> [[M]], <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[X]], <4 x i8> [[S]]
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %i = icmp sge <4 x i8> %x, %y
+  %m = call <4 x i8> @llvm.smax.v4i8(<4 x i8> %y, <4 x i8> %x)
+  %s = shufflevector <4 x i8> %x, <4 x i8> %m, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %r = select <4 x i1> %i, <4 x i8> %x, <4 x i8> %s
+  ret <4 x i8> %r
+}
+
+; TODO: select with non-strict inverted umin pred
+
+define <4 x i8> @uge_xy_umin_select_y_shuf_tval(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @uge_xy_umin_select_y_shuf_tval(
+; CHECK-NEXT:    [[I:%.*]] = icmp uge <4 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[M:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[X]], <4 x i8> [[Y]])
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i8> [[M]], <4 x i8> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[S]], <4 x i8> [[X]]
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %i = icmp uge <4 x i8> %x, %y
+  %m = call <4 x i8> @llvm.umin.v4i8(<4 x i8> %x, <4 x i8> %y)
+  %s = shufflevector <4 x i8> %m, <4 x i8> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  %r = select <4 x i1> %i, <4 x i8> %s, <4 x i8> %x
+  ret <4 x i8> %r
+}
+
+define <4 x i8> @uge_xy_umin_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @uge_xy_umin_select_y_shuf_fval(
+; CHECK-NEXT:    [[I:%.*]] = icmp uge <4 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[M:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[X]], <4 x i8> [[Y]])
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i8> [[M]], <4 x i8> [[Y]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[X]], <4 x i8> [[S]]
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %i = icmp uge <4 x i8> %x, %y
+  %m = call <4 x i8> @llvm.umin.v4i8(<4 x i8> %x, <4 x i8> %y)
+  %s = shufflevector <4 x i8> %m, <4 x i8> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  %r = select <4 x i1> %i, <4 x i8> %x, <4 x i8> %s
+  ret <4 x i8> %r
+}
+
+define <4 x i8> @uge_xy_umin_select_x_shuf_tval(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @uge_xy_umin_select_x_shuf_tval(
+; CHECK-NEXT:    [[I:%.*]] = icmp uge <4 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[M:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[X]], <4 x i8> [[Y]])
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i8> [[M]], <4 x i8> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[S]], <4 x i8> [[X]]
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %i = icmp uge <4 x i8> %x, %y
+  %m = call <4 x i8> @llvm.umin.v4i8(<4 x i8> %x, <4 x i8> %y)
+  %s = shufflevector <4 x i8> %m, <4 x i8> %x, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  %r = select <4 x i1> %i, <4 x i8> %s, <4 x i8> %x
+  ret <4 x i8> %r
+}
+
+; TODO: select with swapped umax pred
+
+define <4 x i8> @ult_yx_umax_select_y_shuf_fval(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @ult_yx_umax_select_y_shuf_fval(
+; CHECK-NEXT:    [[I:%.*]] = icmp ult <4 x i8> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[M:%.*]] = call <4 x i8> @llvm.umax.v4i8(<4 x i8> [[Y]], <4 x i8> [[X]])
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i8> [[Y]], <4 x i8> [[M]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[X]], <4 x i8> [[S]]
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %i = icmp ult <4 x i8> %y, %x
+  %m = call <4 x i8> @llvm.umax.v4i8(<4 x i8> %y, <4 x i8> %x)
+  %s = shufflevector <4 x i8> %y, <4 x i8> %m, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  %r = select <4 x i1> %i, <4 x i8> %x, <4 x i8> %s
+  ret <4 x i8> %r
+}
+
+define <4 x i8> @ult_yx_umax_select_y_shuf_tval(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @ult_yx_umax_select_y_shuf_tval(
+; CHECK-NEXT:    [[I:%.*]] = icmp ult <4 x i8> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[M:%.*]] = call <4 x i8> @llvm.umax.v4i8(<4 x i8> [[Y]], <4 x i8> [[X]])
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i8> [[Y]], <4 x i8> [[M]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[S]], <4 x i8> [[X]]
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %i = icmp ult <4 x i8> %y, %x
+  %m = call <4 x i8> @llvm.umax.v4i8(<4 x i8> %y, <4 x i8> %x)
+  %s = shufflevector <4 x i8> %y, <4 x i8> %m, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  %r = select <4 x i1> %i, <4 x i8> %s, <4 x i8> %x
+  ret <4 x i8> %r
+}
+
+define <4 x i8> @ult_yx_umax_select_y_shuf_mask_fval(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @ult_yx_umax_select_y_shuf_mask_fval(
+; CHECK-NEXT:    [[I:%.*]] = icmp ult <4 x i8> [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[M:%.*]] = call <4 x i8> @llvm.umax.v4i8(<4 x i8> [[Y]], <4 x i8> [[X]])
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i8> [[Y]], <4 x i8> [[M]], <4 x i32> <i32 5, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[I]], <4 x i8> [[X]], <4 x i8> [[S]]
+; CHECK-NEXT:    ret <4 x i8> [[R]]
+;
+  %i = icmp ult <4 x i8> %y, %x
+  %m = call <4 x i8> @llvm.umax.v4i8(<4 x i8> %y, <4 x i8> %x)
+  %s = shufflevector <4 x i8> %y, <4 x i8> %m, <4 x i32> <i32 5, i32 1, i32 2, i32 3>
+  %r = select <4 x i1> %i, <4 x i8> %x, <4 x i8> %s
+  ret <4 x i8> %r
+}

diff  --git a/llvm/test/Transforms/PhaseOrdering/vector-select.ll b/llvm/test/Transforms/PhaseOrdering/vector-select.ll
index 84876b24c5d9b..3817be852b581 100644
--- a/llvm/test/Transforms/PhaseOrdering/vector-select.ll
+++ b/llvm/test/Transforms/PhaseOrdering/vector-select.ll
@@ -90,5 +90,54 @@ define <4 x i8> @allSignBits_vec(<4 x i8> %cond, <4 x i8> %tval, <4 x i8> %fval)
   ret <4 x i8> %sel
 }
 
+define <4 x i32> @PR42100(<4 x i32> noundef %x, <4 x i32> noundef %min) {
+; CHECK-LABEL: @PR42100(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[MIN:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[X]], <4 x i32> [[MIN]])
+; CHECK-NEXT:    [[MIN_ADDR_1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[MIN]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[SEL3:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[X]], <4 x i32> [[MIN_ADDR_1]]
+; CHECK-NEXT:    [[MIN_ADDR_1_1:%.*]] = shufflevector <4 x i32> [[MIN_ADDR_1]], <4 x i32> [[SEL3]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+; CHECK-NEXT:    [[SEL4:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[X]], <4 x i32> [[MIN_ADDR_1_1]]
+; CHECK-NEXT:    [[MIN_ADDR_1_2:%.*]] = shufflevector <4 x i32> [[MIN_ADDR_1_1]], <4 x i32> [[SEL4]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    [[SEL5:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[X]], <4 x i32> [[MIN_ADDR_1_2]]
+; CHECK-NEXT:    [[MIN_ADDR_1_3:%.*]] = shufflevector <4 x i32> [[MIN_ADDR_1_2]], <4 x i32> [[SEL5]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    ret <4 x i32> [[MIN_ADDR_1_3]]
+;
+entry:
+  br label %for.cond
+
+for.cond:
+  %min.addr.0 = phi <4 x i32> [ %min, %entry ], [ %min.addr.1, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp ne i32 %i.0, 4
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  br label %for.end
+
+for.body:
+  %vecext = extractelement <4 x i32> %x, i32 %i.0
+  %vecext1 = extractelement <4 x i32> %min.addr.0, i32 %i.0
+  %cmp2 = icmp slt i32 %vecext, %vecext1
+  br i1 %cmp2, label %if.then, label %if.end
+
+if.then:
+  %vecext3 = extractelement <4 x i32> %x, i32 %i.0
+  %vecins = insertelement <4 x i32> %min.addr.0, i32 %vecext3, i32 %i.0
+  br label %if.end
+
+if.end:
+  %min.addr.1 = phi <4 x i32> [ %vecins, %if.then ], [ %min.addr.0, %for.body ]
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <4 x i32> %min.addr.0
+}
+
 declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
 declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1