[llvm] [ValueTracking] Add support for `vector_reduce_{s, u}{min, max}` in `isKnownNonZero`/`computeKnownBits`. (PR #88169)

Tue Apr 9 21:34:33 PDT 2024

https://github.com/goldsteinn updated https://github.com/llvm/llvm-project/pull/88169

>From 2ee147795a6f09a40be6a521e74507ad62bc308b Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Tue, 9 Apr 2024 12:45:05 -0500
Subject: [PATCH 1/3] [InstCombine] Add tests for non-zero/knownbits of
 `vector_reduce_{s,u}{min,max}`; NFC

---
 .../vector-reduce-min-max-known.ll            | 295 ++++++++++++++++++
 1 file changed, 295 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll

diff --git a/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll b/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll
new file mode 100644
index 00000000000000..a02ebcca8090a2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll
@@ -0,0 +1,295 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i1 @vec_reduce_umax_non_zero(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umax_non_zero(
+; CHECK-NEXT:    [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], <i8 0, i8 1, i8 0, i8 0>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = add nuw <4 x i8> %xx, <i8 0, i8 1, i8 0, i8 0>
+  %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i1 @vec_reduce_umax_non_zero_fail(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umax_non_zero_fail(
+; CHECK-NEXT:    [[X:%.*]] = add nsw <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = add nsw <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
+  %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i1 @vec_reduce_umin_non_zero(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umin_non_zero(
+; CHECK-NEXT:    [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = add nuw <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
+  %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i1 @vec_reduce_umin_non_zero_fail(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umin_non_zero_fail(
+; CHECK-NEXT:    [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], <i8 0, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = add nuw <4 x i8> %xx, <i8 0, i8 1, i8 1, i8 1>
+  %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i1 @vec_reduce_smax_non_zero0(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smax_non_zero0(
+; CHECK-NEXT:    [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = add nuw <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
+  %v = call i8 @llvm.vector.reduce.smax(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i1 @vec_reduce_smax_non_zero1(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smax_non_zero1(
+; CHECK-NEXT:    [[X0:%.*]] = and <4 x i8> [[XX:%.*]], <i8 127, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[X0]], <i8 1, i8 0, i8 0, i8 0>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x0 = and <4 x i8> %xx, <i8 127, i8 255, i8 255, i8 255>
+  %x = or <4 x i8> %x0, <i8 1, i8 0, i8 0, i8 0>
+  %v = call i8 @llvm.vector.reduce.smax(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i1 @vec_reduce_smax_non_zero_fail(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smax_non_zero_fail(
+; CHECK-NEXT:    [[X0:%.*]] = and <4 x i8> [[XX:%.*]], <i8 127, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[X:%.*]] = add nuw <4 x i8> [[X0]], <i8 1, i8 0, i8 0, i8 0>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x0 = and <4 x i8> %xx, <i8 127, i8 255, i8 255, i8 255>
+  %x = add nuw <4 x i8> %x0, <i8 1, i8 0, i8 0, i8 0>
+  %v = call i8 @llvm.vector.reduce.smax(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i1 @vec_reduce_smin_non_zero0(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smin_non_zero0(
+; CHECK-NEXT:    [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = add nuw <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
+  %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i1 @vec_reduce_smin_non_zero1(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smin_non_zero1(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 0, i8 0, i8 0, i8 -128>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = or <4 x i8> %xx, <i8 0, i8 0, i8 0, i8 128>
+  %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i1 @vec_reduce_smin_non_zero_fail(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smin_non_zero_fail(
+; CHECK-NEXT:    [[X0:%.*]] = or <4 x i8> [[XX:%.*]], <i8 0, i8 0, i8 0, i8 -128>
+; CHECK-NEXT:    [[X:%.*]] = add <4 x i8> [[X0]], <i8 0, i8 0, i8 0, i8 1>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x0 = or <4 x i8> %xx, <i8 0, i8 0, i8 0, i8 128>
+  %x = add <4 x i8> %x0, <i8 0, i8 0, i8 0, i8 1>
+  %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i8 @vec_reduce_umax_known0(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umax_known0(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 1
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x = or <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
+  %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
+  %r = and i8 %v, 1
+  ret i8 %r
+}
+
+define i8 @vec_reduce_umax_known1(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umax_known1(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 -128>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], -128
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x = or <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 128>
+  %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
+  %r = and i8 %v, 128
+  ret i8 %r
+}
+
+define i8 @vec_reduce_umax_known_fail0(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umax_known_fail0(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 -128>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 1
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x = or <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 128>
+  %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
+  %r = and i8 %v, 1
+  ret i8 %r
+}
+
+define i8 @vec_reduce_umax_known_fail1(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umax_known_fail1(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 2, i8 4, i8 8>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 1
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x = or <4 x i8> %xx, <i8 1, i8 2, i8 4, i8 8>
+  %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
+  %r = and i8 %v, 1
+  ret i8 %r
+}
+
+define i8 @vec_reduce_umin_known0(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umin_known0(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 1
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x = or <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
+  %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
+  %r = and i8 %v, 1
+  ret i8 %r
+}
+
+define i8 @vec_reduce_umin_known1(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umin_known1(
+; CHECK-NEXT:    [[X:%.*]] = and <4 x i8> [[XX:%.*]], <i8 127, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], -128
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x = and <4 x i8> %xx, <i8 127, i8 255, i8 255, i8 255>
+  %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
+  %r = and i8 %v, 128
+  ret i8 %r
+}
+
+define i8 @vec_reduce_umin_known_fail0(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umin_known_fail0(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 0, i8 0, i8 0>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 1
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x0 = and <4 x i8> %xx, <i8 127, i8 255, i8 255, i8 255>
+  %x = or <4 x i8> %xx, <i8 1, i8 0, i8 0, i8 0>
+  %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
+  %r = and i8 %v, 1
+  ret i8 %r
+}
+
+define i8 @vec_reduce_umin_known_fail1(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umin_known_fail1(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 2, i8 4, i8 8>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 1
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x = or <4 x i8> %xx, <i8 1, i8 2, i8 4, i8 8>
+  %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
+  %r = and i8 %v, 1
+  ret i8 %r
+}
+
+define i8 @vec_reduce_smax_known(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smax_known(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 4, i8 4, i8 4, i8 5>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 4
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x = or <4 x i8> %xx, <i8 4, i8 4, i8 4, i8 5>
+  %v = call i8 @llvm.vector.reduce.smax(<4 x i8> %x)
+  %r = and i8 %v, 4
+  ret i8 %r
+}
+
+define i8 @vec_reduce_smax_known_fail(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smax_known_fail(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 4, i8 4, i8 8, i8 5>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 4
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x = or <4 x i8> %xx, <i8 4, i8 4, i8 8, i8 5>
+  %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
+  %r = and i8 %v, 4
+  ret i8 %r
+}
+
+define i8 @vec_reduce_smin_known(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smin_known(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 8, i8 24, i8 56, i8 9>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 8
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x = or <4 x i8> %xx, <i8 8, i8 24, i8 56, i8 9>
+  %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x)
+  %r = and i8 %v, 8
+  ret i8 %r
+}
+
+define i8 @vec_reduce_smin_known_fail(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smin_known_fail(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 8, i8 23, i8 56, i8 9>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 8
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x = or <4 x i8> %xx, <i8 8, i8 23, i8 56, i8 9>
+  %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x)
+  %r = and i8 %v, 8
+  ret i8 %r
+}

>From 6a359c122e75d308b8a8e5cffa6d201fab029b1c Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Tue, 9 Apr 2024 11:58:38 -0500
Subject: [PATCH 2/3] [ValueTracking] Add support for
 `vector_reduce_{s,u}{min,max}` in `isKnownNonZero`

Previously missing, proofs for all implementations:
https://alive2.llvm.org/ce/z/G8wpmG
---
 llvm/lib/Analysis/ValueTracking.cpp               |  6 ++++++
 .../InstCombine/vector-reduce-min-max-known.ll    | 15 +++------------
 2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index ca48cfe7738154..869a94d81f4dfd 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -2824,6 +2824,12 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
         return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth,
                             II->getArgOperand(0), II->getArgOperand(1),
                             /*NSW=*/true, /* NUW=*/false);
+        // umin/smin/smax/smin of all non-zero elements is always non-zero.
+      case Intrinsic::vector_reduce_umax:
+      case Intrinsic::vector_reduce_umin:
+      case Intrinsic::vector_reduce_smax:
+      case Intrinsic::vector_reduce_smin:
+        return isKnownNonZero(II->getArgOperand(0), Depth, Q);
       case Intrinsic::umax:
       case Intrinsic::uadd_sat:
         return isKnownNonZero(II->getArgOperand(1), DemandedElts, Depth, Q) ||
diff --git a/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll b/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll
index a02ebcca8090a2..29c08b17ef885a 100644
--- a/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll
+++ b/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll
@@ -29,10 +29,7 @@ define i1 @vec_reduce_umax_non_zero_fail(<4 x i8> %xx) {
 
 define i1 @vec_reduce_umin_non_zero(<4 x i8> %xx) {
 ; CHECK-LABEL: @vec_reduce_umin_non_zero(
-; CHECK-NEXT:    [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]])
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    ret i1 false
 ;
   %x = add nuw <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
   %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
@@ -55,10 +52,7 @@ define i1 @vec_reduce_umin_non_zero_fail(<4 x i8> %xx) {
 
 define i1 @vec_reduce_smax_non_zero0(<4 x i8> %xx) {
 ; CHECK-LABEL: @vec_reduce_smax_non_zero0(
-; CHECK-NEXT:    [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> [[X]])
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    ret i1 false
 ;
   %x = add nuw <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
   %v = call i8 @llvm.vector.reduce.smax(<4 x i8> %x)
@@ -98,10 +92,7 @@ define i1 @vec_reduce_smax_non_zero_fail(<4 x i8> %xx) {
 
 define i1 @vec_reduce_smin_non_zero0(<4 x i8> %xx) {
 ; CHECK-LABEL: @vec_reduce_smin_non_zero0(
-; CHECK-NEXT:    [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> [[X]])
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    ret i1 false
 ;
   %x = add nuw <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
   %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x)

>From ed78299a33a914e3da0b62a5d035bd93eea09601 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Tue, 9 Apr 2024 11:58:48 -0500
Subject: [PATCH 3/3] [ValueTracking] Add support for
 `vector_reduce_{s,u}{min,max}` in `computeKnownBits`

Previously missing. We compute by just applying the reduce function on
the knownbits of each element.
---
 llvm/lib/Analysis/ValueTracking.cpp           |  8 ++++++++
 .../vector-reduce-min-max-known.ll            | 20 ++++---------------
 2 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 869a94d81f4dfd..4120876889dec9 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1621,6 +1621,14 @@ static void computeKnownBitsFromOperator(const Operator *I,
         computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
         Known = KnownBits::ssub_sat(Known, Known2);
         break;
+        // for min/max reduce, any bit common to each element in the input vec
+        // is set in the output.
+      case Intrinsic::vector_reduce_umax:
+      case Intrinsic::vector_reduce_umin:
+      case Intrinsic::vector_reduce_smax:
+      case Intrinsic::vector_reduce_smin:
+        computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+        break;
       case Intrinsic::umin:
         computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
         computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
diff --git a/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll b/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll
index 29c08b17ef885a..65d00083532621 100644
--- a/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll
+++ b/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll
@@ -130,10 +130,7 @@ define i1 @vec_reduce_smin_non_zero_fail(<4 x i8> %xx) {
 
 define i8 @vec_reduce_umax_known0(<4 x i8> %xx) {
 ; CHECK-LABEL: @vec_reduce_umax_known0(
-; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]])
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 1
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 1
 ;
   %x = or <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
   %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
@@ -182,10 +179,7 @@ define i8 @vec_reduce_umax_known_fail1(<4 x i8> %xx) {
 
 define i8 @vec_reduce_umin_known0(<4 x i8> %xx) {
 ; CHECK-LABEL: @vec_reduce_umin_known0(
-; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]])
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 1
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 1
 ;
   %x = or <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
   %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
@@ -235,10 +229,7 @@ define i8 @vec_reduce_umin_known_fail1(<4 x i8> %xx) {
 
 define i8 @vec_reduce_smax_known(<4 x i8> %xx) {
 ; CHECK-LABEL: @vec_reduce_smax_known(
-; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 4, i8 4, i8 4, i8 5>
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> [[X]])
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 4
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 4
 ;
   %x = or <4 x i8> %xx, <i8 4, i8 4, i8 4, i8 5>
   %v = call i8 @llvm.vector.reduce.smax(<4 x i8> %x)
@@ -261,10 +252,7 @@ define i8 @vec_reduce_smax_known_fail(<4 x i8> %xx) {
 
 define i8 @vec_reduce_smin_known(<4 x i8> %xx) {
 ; CHECK-LABEL: @vec_reduce_smin_known(
-; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 8, i8 24, i8 56, i8 9>
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> [[X]])
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 8
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 8
 ;
   %x = or <4 x i8> %xx, <i8 8, i8 24, i8 56, i8 9>
   %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x)