[llvm] perf/goldsteinn/support reduce minmax (PR #88169)

Tue Apr 9 10:56:49 PDT 2024

https://github.com/goldsteinn created https://github.com/llvm/llvm-project/pull/88169

- **[ValueTracking] Expand `isKnown{Negative,Positive}` APIs; NFC**
- **[InstCombine] Add tests for non-zero/knownbits of `vector_reduce_{s,u}{min,max}`; NFC**
- **[ValueTracking] Add support for `vector_reduce_{s,u}{min,max}` in `isKnownNonZero`**
- **[ValueTracking] Add support for `vector_reduce_{s,u}{min,max}` in `computeKnownBits`**


>From 52d9abec3577e23d3edbb74c61bd188a801a9de8 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Tue, 9 Apr 2024 11:54:22 -0500
Subject: [PATCH 1/4] [ValueTracking] Expand `isKnown{Negative,Positive}` APIs;
 NFC

1) Add support for `DemandedElts`.
2) Add private API that also returns the already computed KnownBits.
---
 llvm/include/llvm/Analysis/ValueTracking.h | 10 ++++++
 llvm/lib/Analysis/ValueTracking.cpp        | 41 +++++++++++++++++++---
 2 files changed, 46 insertions(+), 5 deletions(-)

diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index 3970efba18cc8c..7287a8fb122bbb 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -145,11 +145,21 @@ bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ,
 bool isKnownPositive(const Value *V, const SimplifyQuery &SQ,
                      unsigned Depth = 0);
 
+/// Returns true if the given value is known be positive (i.e. non-negative
+/// and non-zero) for DemandedElts.
+bool isKnownPositive(const Value *V, const APInt &DemandedElts,
+                     const SimplifyQuery &SQ, unsigned Depth = 0);
+
 /// Returns true if the given value is known be negative (i.e. non-positive
 /// and non-zero).
 bool isKnownNegative(const Value *V, const SimplifyQuery &DL,
                      unsigned Depth = 0);
 
+/// Returns true if the given value is known be negative (i.e. non-positive
+/// and non-zero) for DemandedElts.
+bool isKnownNegative(const Value *V, const APInt &DemandedElts,
+                     const SimplifyQuery &DL, unsigned Depth = 0);
+
 /// Return true if the given values are known to be non-equal when defined.
 /// Supports scalar integer types only.
 bool isKnownNonEqual(const Value *V1, const Value *V2, const DataLayout &DL,
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index ca48cfe7738154..3bc5d9ee193f79 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -289,21 +289,52 @@ bool llvm::isKnownNonNegative(const Value *V, const SimplifyQuery &SQ,
   return computeKnownBits(V, Depth, SQ).isNonNegative();
 }
 
-bool llvm::isKnownPositive(const Value *V, const SimplifyQuery &SQ,
-                           unsigned Depth) {
+static bool isKnownPositive(const Value *V, const APInt &DemandedElts,
+                            KnownBits &Known, const SimplifyQuery &SQ,
+                            unsigned Depth) {
   if (auto *CI = dyn_cast<ConstantInt>(V))
     return CI->getValue().isStrictlyPositive();
 
   // If `isKnownNonNegative` ever becomes more sophisticated, make sure to keep
   // this updated.
-  KnownBits Known = computeKnownBits(V, Depth, SQ);
+  Known = computeKnownBits(V, DemandedElts, Depth, SQ);
   return Known.isNonNegative() &&
-         (Known.isNonZero() || ::isKnownNonZero(V, Depth, SQ));
+         (Known.isNonZero() || ::isKnownNonZero(V, DemandedElts, Depth, SQ));
+}
+
+bool llvm::isKnownPositive(const Value *V, const APInt &DemandedElts,
+                           const SimplifyQuery &SQ, unsigned Depth) {
+  KnownBits Known(getBitWidth(V->getType(), SQ.DL));
+  return ::isKnownPositive(V, DemandedElts, Known, SQ, Depth);
+}
+
+bool llvm::isKnownPositive(const Value *V, const SimplifyQuery &SQ,
+                           unsigned Depth) {
+  auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
+  APInt DemandedElts =
+      FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
+  return isKnownPositive(V, DemandedElts, SQ, Depth);
+}
+
+static bool isKnownNegative(const Value *V, const APInt &DemandedElts,
+                            KnownBits &Known, const SimplifyQuery &SQ,
+                            unsigned Depth) {
+  Known = computeKnownBits(V, DemandedElts, Depth, SQ);
+  return Known.isNegative();
+}
+
+bool llvm::isKnownNegative(const Value *V, const APInt &DemandedElts,
+                           const SimplifyQuery &SQ, unsigned Depth) {
+  KnownBits Known(getBitWidth(V->getType(), SQ.DL));
+  return ::isKnownNegative(V, DemandedElts, Known, SQ, Depth);
 }
 
 bool llvm::isKnownNegative(const Value *V, const SimplifyQuery &SQ,
                            unsigned Depth) {
-  return computeKnownBits(V, Depth, SQ).isNegative();
+  auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
+  APInt DemandedElts =
+      FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
+  return isKnownNegative(V, DemandedElts, SQ, Depth);
 }
 
 static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth,

>From 9e4fdc2164692e79e0e8e9dc7fd7712cf70e20d0 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Tue, 9 Apr 2024 12:45:05 -0500
Subject: [PATCH 2/4] [InstCombine] Add tests for non-zero/knownbits of
 `vector_reduce_{s,u}{min,max}`; NFC

---
 .../vector-reduce-min-max-known.ll            | 295 ++++++++++++++++++
 1 file changed, 295 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll

diff --git a/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll b/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll
new file mode 100644
index 00000000000000..a02ebcca8090a2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll
@@ -0,0 +1,295 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i1 @vec_reduce_umax_non_zero(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umax_non_zero(
+; CHECK-NEXT:    [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], <i8 0, i8 1, i8 0, i8 0>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = add nuw <4 x i8> %xx, <i8 0, i8 1, i8 0, i8 0>
+  %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i1 @vec_reduce_umax_non_zero_fail(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umax_non_zero_fail(
+; CHECK-NEXT:    [[X:%.*]] = add nsw <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = add nsw <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
+  %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i1 @vec_reduce_umin_non_zero(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umin_non_zero(
+; CHECK-NEXT:    [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = add nuw <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
+  %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i1 @vec_reduce_umin_non_zero_fail(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umin_non_zero_fail(
+; CHECK-NEXT:    [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], <i8 0, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = add nuw <4 x i8> %xx, <i8 0, i8 1, i8 1, i8 1>
+  %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i1 @vec_reduce_smax_non_zero0(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smax_non_zero0(
+; CHECK-NEXT:    [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = add nuw <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
+  %v = call i8 @llvm.vector.reduce.smax(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i1 @vec_reduce_smax_non_zero1(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smax_non_zero1(
+; CHECK-NEXT:    [[X0:%.*]] = and <4 x i8> [[XX:%.*]], <i8 127, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[X0]], <i8 1, i8 0, i8 0, i8 0>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x0 = and <4 x i8> %xx, <i8 127, i8 255, i8 255, i8 255>
+  %x = or <4 x i8> %x0, <i8 1, i8 0, i8 0, i8 0>
+  %v = call i8 @llvm.vector.reduce.smax(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i1 @vec_reduce_smax_non_zero_fail(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smax_non_zero_fail(
+; CHECK-NEXT:    [[X0:%.*]] = and <4 x i8> [[XX:%.*]], <i8 127, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[X:%.*]] = add nuw <4 x i8> [[X0]], <i8 1, i8 0, i8 0, i8 0>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x0 = and <4 x i8> %xx, <i8 127, i8 255, i8 255, i8 255>
+  %x = add nuw <4 x i8> %x0, <i8 1, i8 0, i8 0, i8 0>
+  %v = call i8 @llvm.vector.reduce.smax(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i1 @vec_reduce_smin_non_zero0(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smin_non_zero0(
+; CHECK-NEXT:    [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = add nuw <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
+  %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i1 @vec_reduce_smin_non_zero1(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smin_non_zero1(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 0, i8 0, i8 0, i8 -128>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x = or <4 x i8> %xx, <i8 0, i8 0, i8 0, i8 128>
+  %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i1 @vec_reduce_smin_non_zero_fail(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smin_non_zero_fail(
+; CHECK-NEXT:    [[X0:%.*]] = or <4 x i8> [[XX:%.*]], <i8 0, i8 0, i8 0, i8 -128>
+; CHECK-NEXT:    [[X:%.*]] = add <4 x i8> [[X0]], <i8 0, i8 0, i8 0, i8 1>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x0 = or <4 x i8> %xx, <i8 0, i8 0, i8 0, i8 128>
+  %x = add <4 x i8> %x0, <i8 0, i8 0, i8 0, i8 1>
+  %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x)
+  %r = icmp eq i8 %v, 0
+  ret i1 %r
+}
+
+define i8 @vec_reduce_umax_known0(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umax_known0(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 1
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x = or <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
+  %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
+  %r = and i8 %v, 1
+  ret i8 %r
+}
+
+define i8 @vec_reduce_umax_known1(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umax_known1(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 -128>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], -128
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x = or <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 128>
+  %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
+  %r = and i8 %v, 128
+  ret i8 %r
+}
+
+define i8 @vec_reduce_umax_known_fail0(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umax_known_fail0(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 -128>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 1
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x = or <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 128>
+  %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
+  %r = and i8 %v, 1
+  ret i8 %r
+}
+
+define i8 @vec_reduce_umax_known_fail1(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umax_known_fail1(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 2, i8 4, i8 8>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 1
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x = or <4 x i8> %xx, <i8 1, i8 2, i8 4, i8 8>
+  %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
+  %r = and i8 %v, 1
+  ret i8 %r
+}
+
+define i8 @vec_reduce_umin_known0(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umin_known0(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 1
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x = or <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
+  %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
+  %r = and i8 %v, 1
+  ret i8 %r
+}
+
+define i8 @vec_reduce_umin_known1(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umin_known1(
+; CHECK-NEXT:    [[X:%.*]] = and <4 x i8> [[XX:%.*]], <i8 127, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], -128
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x = and <4 x i8> %xx, <i8 127, i8 255, i8 255, i8 255>
+  %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
+  %r = and i8 %v, 128
+  ret i8 %r
+}
+
+define i8 @vec_reduce_umin_known_fail0(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umin_known_fail0(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 0, i8 0, i8 0>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 1
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x0 = and <4 x i8> %xx, <i8 127, i8 255, i8 255, i8 255>
+  %x = or <4 x i8> %xx, <i8 1, i8 0, i8 0, i8 0>
+  %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
+  %r = and i8 %v, 1
+  ret i8 %r
+}
+
+define i8 @vec_reduce_umin_known_fail1(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_umin_known_fail1(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 2, i8 4, i8 8>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 1
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x = or <4 x i8> %xx, <i8 1, i8 2, i8 4, i8 8>
+  %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
+  %r = and i8 %v, 1
+  ret i8 %r
+}
+
+define i8 @vec_reduce_smax_known(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smax_known(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 4, i8 4, i8 4, i8 5>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 4
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x = or <4 x i8> %xx, <i8 4, i8 4, i8 4, i8 5>
+  %v = call i8 @llvm.vector.reduce.smax(<4 x i8> %x)
+  %r = and i8 %v, 4
+  ret i8 %r
+}
+
+define i8 @vec_reduce_smax_known_fail(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smax_known_fail(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 4, i8 4, i8 8, i8 5>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 4
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x = or <4 x i8> %xx, <i8 4, i8 4, i8 8, i8 5>
+  %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
+  %r = and i8 %v, 4
+  ret i8 %r
+}
+
+define i8 @vec_reduce_smin_known(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smin_known(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 8, i8 24, i8 56, i8 9>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 8
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x = or <4 x i8> %xx, <i8 8, i8 24, i8 56, i8 9>
+  %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x)
+  %r = and i8 %v, 8
+  ret i8 %r
+}
+
+define i8 @vec_reduce_smin_known_fail(<4 x i8> %xx) {
+; CHECK-LABEL: @vec_reduce_smin_known_fail(
+; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 8, i8 23, i8 56, i8 9>
+; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 8
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %x = or <4 x i8> %xx, <i8 8, i8 23, i8 56, i8 9>
+  %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x)
+  %r = and i8 %v, 8
+  ret i8 %r
+}

>From cf82747a4380aba020943328da689b7d95431e32 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Tue, 9 Apr 2024 11:58:38 -0500
Subject: [PATCH 3/4] [ValueTracking] Add support for
 `vector_reduce_{s,u}{min,max}` in `isKnownNonZero`

Previously missing, proofs for all implementations:
https://alive2.llvm.org/ce/z/G8wpmG
---
 llvm/lib/Analysis/ValueTracking.cpp           | 43 +++++++++++++++++++
 .../vector-reduce-min-max-known.ll            | 31 +++----------
 2 files changed, 49 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 3bc5d9ee193f79..716d1a06070cbb 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -2855,6 +2855,49 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
         return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth,
                             II->getArgOperand(0), II->getArgOperand(1),
                             /*NSW=*/true, /* NUW=*/false);
+      case Intrinsic::vector_reduce_umax:
+        if (auto *VecTy =
+                dyn_cast<FixedVectorType>(II->getArgOperand(0)->getType())) {
+          unsigned NumEle = VecTy->getNumElements();
+          // If any element is non-zero the reduce is non-zero.
+          for (unsigned Idx = 0; Idx < NumEle; ++Idx) {
+            if (isKnownNonZero(II->getArgOperand(0),
+                               APInt::getOneBitSet(NumEle, Idx), Depth, Q))
+              return true;
+          }
+          return false;
+        }
+        [[fallthrough]];
+      case Intrinsic::vector_reduce_umin:
+        return isKnownNonZero(II->getArgOperand(0), Depth, Q);
+      case Intrinsic::vector_reduce_smax:
+      case Intrinsic::vector_reduce_smin:
+        if (auto *VecTy =
+                dyn_cast<FixedVectorType>(II->getArgOperand(0)->getType())) {
+          bool AllNonZero = true;
+          auto EleImpliesNonZero = [&](const APInt &DemandedEle) {
+            KnownBits TmpKnown(
+                getBitWidth(II->getArgOperand(0)->getType(), Q.DL));
+            bool Ret = II->getIntrinsicID() == Intrinsic::vector_reduce_smin
+                           ? ::isKnownNegative(II->getArgOperand(0),
+                                               DemandedEle, TmpKnown, Q, Depth)
+                           : ::isKnownPositive(II->getArgOperand(0),
+                                               DemandedEle, TmpKnown, Q, Depth);
+            AllNonZero &= TmpKnown.isNonZero();
+            return Ret;
+          };
+          unsigned NumEle = VecTy->getNumElements();
+          // If any element is negative/strictly-positive (for smin/smax
+          // respectively) the reduce is non-zero.
+          for (unsigned Idx = 0; Idx < NumEle; ++Idx) {
+            if (EleImpliesNonZero(APInt::getOneBitSet(NumEle, Idx)))
+              return true;
+          }
+          if (AllNonZero)
+            return true;
+        }
+        // Otherwise, if all elements are non-zero, result is non-zero
+        return isKnownNonZero(II->getArgOperand(0), Depth, Q);
       case Intrinsic::umax:
       case Intrinsic::uadd_sat:
         return isKnownNonZero(II->getArgOperand(1), DemandedElts, Depth, Q) ||
diff --git a/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll b/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll
index a02ebcca8090a2..9f236b8e51aec7 100644
--- a/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll
+++ b/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll
@@ -3,10 +3,7 @@
 
 define i1 @vec_reduce_umax_non_zero(<4 x i8> %xx) {
 ; CHECK-LABEL: @vec_reduce_umax_non_zero(
-; CHECK-NEXT:    [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], <i8 0, i8 1, i8 0, i8 0>
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]])
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    ret i1 false
 ;
   %x = add nuw <4 x i8> %xx, <i8 0, i8 1, i8 0, i8 0>
   %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
@@ -29,10 +26,7 @@ define i1 @vec_reduce_umax_non_zero_fail(<4 x i8> %xx) {
 
 define i1 @vec_reduce_umin_non_zero(<4 x i8> %xx) {
 ; CHECK-LABEL: @vec_reduce_umin_non_zero(
-; CHECK-NEXT:    [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]])
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    ret i1 false
 ;
   %x = add nuw <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
   %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
@@ -55,10 +49,7 @@ define i1 @vec_reduce_umin_non_zero_fail(<4 x i8> %xx) {
 
 define i1 @vec_reduce_smax_non_zero0(<4 x i8> %xx) {
 ; CHECK-LABEL: @vec_reduce_smax_non_zero0(
-; CHECK-NEXT:    [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> [[X]])
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    ret i1 false
 ;
   %x = add nuw <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
   %v = call i8 @llvm.vector.reduce.smax(<4 x i8> %x)
@@ -68,11 +59,7 @@ define i1 @vec_reduce_smax_non_zero0(<4 x i8> %xx) {
 
 define i1 @vec_reduce_smax_non_zero1(<4 x i8> %xx) {
 ; CHECK-LABEL: @vec_reduce_smax_non_zero1(
-; CHECK-NEXT:    [[X0:%.*]] = and <4 x i8> [[XX:%.*]], <i8 127, i8 -1, i8 -1, i8 -1>
-; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[X0]], <i8 1, i8 0, i8 0, i8 0>
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> [[X]])
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    ret i1 false
 ;
   %x0 = and <4 x i8> %xx, <i8 127, i8 255, i8 255, i8 255>
   %x = or <4 x i8> %x0, <i8 1, i8 0, i8 0, i8 0>
@@ -98,10 +85,7 @@ define i1 @vec_reduce_smax_non_zero_fail(<4 x i8> %xx) {
 
 define i1 @vec_reduce_smin_non_zero0(<4 x i8> %xx) {
 ; CHECK-LABEL: @vec_reduce_smin_non_zero0(
-; CHECK-NEXT:    [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> [[X]])
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    ret i1 false
 ;
   %x = add nuw <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
   %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x)
@@ -111,10 +95,7 @@ define i1 @vec_reduce_smin_non_zero0(<4 x i8> %xx) {
 
 define i1 @vec_reduce_smin_non_zero1(<4 x i8> %xx) {
 ; CHECK-LABEL: @vec_reduce_smin_non_zero1(
-; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 0, i8 0, i8 0, i8 -128>
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> [[X]])
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    ret i1 false
 ;
   %x = or <4 x i8> %xx, <i8 0, i8 0, i8 0, i8 128>
   %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x)

>From 16a97a3135002a7054b4bf73dd6f1fe4161b1aa9 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Tue, 9 Apr 2024 11:58:48 -0500
Subject: [PATCH 4/4] [ValueTracking] Add support for
 `vector_reduce_{s,u}{min,max}` in `computeKnownBits`

Previously missing. We compute by just applying the reduce function on
the knownbits of each element.
---
 llvm/lib/Analysis/ValueTracking.cpp           | 34 +++++++++++++++++++
 .../vector-reduce-min-max-known.ll            | 30 ++++------------
 2 files changed, 40 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 716d1a06070cbb..922fd65374e1c7 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1652,6 +1652,40 @@ static void computeKnownBitsFromOperator(const Operator *I,
         computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
         Known = KnownBits::ssub_sat(Known, Known2);
         break;
+      case Intrinsic::vector_reduce_umax:
+      case Intrinsic::vector_reduce_umin:
+      case Intrinsic::vector_reduce_smax:
+      case Intrinsic::vector_reduce_smin:
+        if (auto *VecTy =
+                dyn_cast<FixedVectorType>(II->getArgOperand(0)->getType())) {
+          unsigned NumEle = VecTy->getNumElements();
+          computeKnownBits(II->getArgOperand(0), APInt::getOneBitSet(NumEle, 0),
+                           Known, Depth + 1, Q);
+          for (unsigned Idx = 1; Idx < NumEle; ++Idx) {
+            computeKnownBits(II->getArgOperand(0),
+                             APInt::getOneBitSet(NumEle, Idx), Known2,
+                             Depth + 1, Q);
+            switch (II->getIntrinsicID()) {
+            case Intrinsic::vector_reduce_umax:
+              Known = KnownBits::umax(Known, Known2);
+              break;
+            case Intrinsic::vector_reduce_umin:
+              Known = KnownBits::umin(Known, Known2);
+              break;
+            case Intrinsic::vector_reduce_smax:
+              Known = KnownBits::smax(Known, Known2);
+              break;
+            case Intrinsic::vector_reduce_smin:
+              Known = KnownBits::smin(Known, Known2);
+              break;
+            default:
+              llvm_unreachable("Invalid Intrinsinc in vec reduce min/max case");
+            }
+          }
+        } else {
+          computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+        }
+        break;
       case Intrinsic::umin:
         computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
         computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
diff --git a/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll b/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll
index 9f236b8e51aec7..bee5124404a943 100644
--- a/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll
+++ b/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll
@@ -120,10 +120,7 @@ define i1 @vec_reduce_smin_non_zero_fail(<4 x i8> %xx) {
 
 define i8 @vec_reduce_umax_known0(<4 x i8> %xx) {
 ; CHECK-LABEL: @vec_reduce_umax_known0(
-; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]])
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 1
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 1
 ;
   %x = or <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
   %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
@@ -133,10 +130,7 @@ define i8 @vec_reduce_umax_known0(<4 x i8> %xx) {
 
 define i8 @vec_reduce_umax_known1(<4 x i8> %xx) {
 ; CHECK-LABEL: @vec_reduce_umax_known1(
-; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 -128>
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]])
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], -128
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 -128
 ;
   %x = or <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 128>
   %v = call i8 @llvm.vector.reduce.umax(<4 x i8> %x)
@@ -172,10 +166,7 @@ define i8 @vec_reduce_umax_known_fail1(<4 x i8> %xx) {
 
 define i8 @vec_reduce_umin_known0(<4 x i8> %xx) {
 ; CHECK-LABEL: @vec_reduce_umin_known0(
-; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]])
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 1
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 1
 ;
   %x = or <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
   %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
@@ -185,10 +176,7 @@ define i8 @vec_reduce_umin_known0(<4 x i8> %xx) {
 
 define i8 @vec_reduce_umin_known1(<4 x i8> %xx) {
 ; CHECK-LABEL: @vec_reduce_umin_known1(
-; CHECK-NEXT:    [[X:%.*]] = and <4 x i8> [[XX:%.*]], <i8 127, i8 -1, i8 -1, i8 -1>
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> [[X]])
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], -128
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 0
 ;
   %x = and <4 x i8> %xx, <i8 127, i8 255, i8 255, i8 255>
   %v = call i8 @llvm.vector.reduce.umin(<4 x i8> %x)
@@ -225,10 +213,7 @@ define i8 @vec_reduce_umin_known_fail1(<4 x i8> %xx) {
 
 define i8 @vec_reduce_smax_known(<4 x i8> %xx) {
 ; CHECK-LABEL: @vec_reduce_smax_known(
-; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 4, i8 4, i8 4, i8 5>
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> [[X]])
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 4
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 4
 ;
   %x = or <4 x i8> %xx, <i8 4, i8 4, i8 4, i8 5>
   %v = call i8 @llvm.vector.reduce.smax(<4 x i8> %x)
@@ -251,10 +236,7 @@ define i8 @vec_reduce_smax_known_fail(<4 x i8> %xx) {
 
 define i8 @vec_reduce_smin_known(<4 x i8> %xx) {
 ; CHECK-LABEL: @vec_reduce_smin_known(
-; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 8, i8 24, i8 56, i8 9>
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> [[X]])
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 8
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 8
 ;
   %x = or <4 x i8> %xx, <i8 8, i8 24, i8 56, i8 9>
   %v = call i8 @llvm.vector.reduce.smin(<4 x i8> %x)