[llvm] goldsteinn/known vec reduce add mul (PR #88410)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 11 09:29:46 PDT 2024
https://github.com/goldsteinn created https://github.com/llvm/llvm-project/pull/88410
- **[ValueTracking] Add tests for `computeKnownBits` of `llvm.vector.reduce.{add,mul}`; NFC**
- **[ValueTracking] Implement `computeKnownBits` for `llvm.vector.reduce.{add,mul}`**
- **[ValueTracking] Add tests for `isKnownNonZero` of `llvm.vector.reduce.{add,mul}`; NFC**
- **[ValueTracking] Implement `isKnownNonZero` for `llvm.vector.reduce.{add,mul}`**
>From 7a242961415580cc41a95614f79aa88fa7d14b7a Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Wed, 10 Apr 2024 15:30:43 -0500
Subject: [PATCH 1/4] [ValueTracking] Add tests for `computeKnownBits` of
`llvm.vector.reduce.{add,mul}`; NFC
---
.../test/Transforms/InstCombine/known-bits.ll | 184 ++++++++++++++++++
1 file changed, 184 insertions(+)
diff --git a/llvm/test/Transforms/InstCombine/known-bits.ll b/llvm/test/Transforms/InstCombine/known-bits.ll
index d210b19bb7faf2..fde3e22143333b 100644
--- a/llvm/test/Transforms/InstCombine/known-bits.ll
+++ b/llvm/test/Transforms/InstCombine/known-bits.ll
@@ -999,5 +999,189 @@ define i1 @extract_value_smul_fail(i8 %xx, i8 %yy) {
ret i1 %r
}
+define i8 @known_reduce_add(<2 x i8> %xx) {
+; CHECK-LABEL: @known_reduce_add(
+; CHECK-NEXT: [[X:%.*]] = and <2 x i8> [[XX:%.*]], <i8 3, i8 3>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 8
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x = and <2 x i8> %xx, <i8 3, i8 3>
+ %v = call i8 @llvm.vector.reduce.add(<2 x i8> %x)
+ %r = and i8 %v, 8
+ ret i8 %r
+}
+
+define i8 @known_reduce_add_fail(<2 x i8> %xx) {
+; CHECK-LABEL: @known_reduce_add_fail(
+; CHECK-NEXT: [[X:%.*]] = and <2 x i8> [[XX:%.*]], <i8 3, i8 3>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 4
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x = and <2 x i8> %xx, <i8 3, i8 3>
+ %v = call i8 @llvm.vector.reduce.add(<2 x i8> %x)
+ %r = and i8 %v, 4
+ ret i8 %r
+}
+
+define i8 @known_reduce_add_fail2(<4 x i8> %xx) {
+; CHECK-LABEL: @known_reduce_add_fail2(
+; CHECK-NEXT: [[X:%.*]] = and <4 x i8> [[XX:%.*]], <i8 3, i8 3, i8 3, i8 3>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 8
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x = and <4 x i8> %xx, <i8 3, i8 3, i8 3, i8 3>
+ %v = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %x)
+ %r = and i8 %v, 8
+ ret i8 %r
+}
+
+define i8 @known_reduce_add2(<4 x i8> %xx) {
+; CHECK-LABEL: @known_reduce_add2(
+; CHECK-NEXT: [[X:%.*]] = and <4 x i8> [[XX:%.*]], <i8 3, i8 3, i8 3, i8 3>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 32
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x = and <4 x i8> %xx, <i8 3, i8 3, i8 3, i8 3>
+ %v = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %x)
+ %r = and i8 %v, 32
+ ret i8 %r
+}
+
+define i8 @known_reduce_add3(<2 x i8> %xx) {
+; CHECK-LABEL: @known_reduce_add3(
+; CHECK-NEXT: [[X:%.*]] = or <2 x i8> [[XX:%.*]], <i8 1, i8 1>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 1
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x = or <2 x i8> %xx, <i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.add(<2 x i8> %x)
+ %r = and i8 %v, 1
+ ret i8 %r
+}
+
+define i8 @known_reduce_add33(<3 x i8> %xx) {
+; CHECK-LABEL: @known_reduce_add33(
+; CHECK-NEXT: [[X:%.*]] = or <3 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.add.v3i8(<3 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 1
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x = or <3 x i8> %xx, <i8 1, i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.add.v3i8(<3 x i8> %x)
+ %r = and i8 %v, 1
+ ret i8 %r
+}
+
+define i8 @known_reduce_add34(<4 x i8> %xx) {
+; CHECK-LABEL: @known_reduce_add34(
+; CHECK-NEXT: [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 1
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x = or <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %x)
+ %r = and i8 %v, 1
+ ret i8 %r
+}
+
+define i8 @known_reduce_add4(<2 x i8> %xx) {
+; CHECK-LABEL: @known_reduce_add4(
+; CHECK-NEXT: [[X0:%.*]] = and <2 x i8> [[XX:%.*]], <i8 2, i8 2>
+; CHECK-NEXT: [[X:%.*]] = or disjoint <2 x i8> [[X0]], <i8 1, i8 1>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 2
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x0 = and <2 x i8> %xx, <i8 3, i8 3>
+ %x = or <2 x i8> %x0, <i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.add(<2 x i8> %x)
+ %r = and i8 %v, 2
+ ret i8 %r
+}
+
+define i8 @known_reduce_add4_fail(<2 x i8> %xx) {
+; CHECK-LABEL: @known_reduce_add4_fail(
+; CHECK-NEXT: [[X:%.*]] = or <2 x i8> [[XX:%.*]], <i8 1, i8 1>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 2
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x = or <2 x i8> %xx, <i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.add(<2 x i8> %x)
+ %r = and i8 %v, 2
+ ret i8 %r
+}
+
+define i8 @known_reduce_mul(<2 x i8> %xx) {
+; CHECK-LABEL: @known_reduce_mul(
+; CHECK-NEXT: [[X:%.*]] = and <2 x i8> [[XX:%.*]], <i8 3, i8 3>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 16
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x = and <2 x i8> %xx, <i8 3, i8 3>
+ %v = call i8 @llvm.vector.reduce.mul(<2 x i8> %x)
+ %r = and i8 %v, 16
+ ret i8 %r
+}
+
+define i8 @known_reduce_mul_fail(<2 x i8> %xx) {
+; CHECK-LABEL: @known_reduce_mul_fail(
+; CHECK-NEXT: [[X:%.*]] = and <2 x i8> [[XX:%.*]], <i8 3, i8 3>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 8
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x = and <2 x i8> %xx, <i8 3, i8 3>
+ %v = call i8 @llvm.vector.reduce.mul(<2 x i8> %x)
+ %r = and i8 %v, 8
+ ret i8 %r
+}
+
+define i8 @known_reduce_mul_fail2(<3 x i8> %xx) {
+; CHECK-LABEL: @known_reduce_mul_fail2(
+; CHECK-NEXT: [[X:%.*]] = and <3 x i8> [[XX:%.*]], <i8 3, i8 3, i8 3>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.mul.v3i8(<3 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 32
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x = and <3 x i8> %xx, <i8 3, i8 3, i8 3>
+ %v = call i8 @llvm.vector.reduce.mul.v3i8(<3 x i8> %x)
+ %r = and i8 %v, 32
+ ret i8 %r
+}
+
+define i8 @known_reduce_mul2(<3 x i8> %xx) {
+; CHECK-LABEL: @known_reduce_mul2(
+; CHECK-NEXT: [[X:%.*]] = and <3 x i8> [[XX:%.*]], <i8 3, i8 3, i8 3>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.mul.v3i8(<3 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 64
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x = and <3 x i8> %xx, <i8 3, i8 3, i8 3>
+ %v = call i8 @llvm.vector.reduce.mul.v3i8(<3 x i8> %x)
+ %r = and i8 %v, 64
+ ret i8 %r
+}
+
+define i8 @known_reduce_mul3(<2 x i8> %xx) {
+; CHECK-LABEL: @known_reduce_mul3(
+; CHECK-NEXT: [[X:%.*]] = or <2 x i8> [[XX:%.*]], <i8 1, i8 1>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 1
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x = or <2 x i8> %xx, <i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.mul(<2 x i8> %x)
+ %r = and i8 %v, 1
+ ret i8 %r
+}
+
declare void @use(i1)
declare void @sink(i8)
>From 03936f5984493c4a845a3f8122ffdbe14330cad3 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Wed, 10 Apr 2024 14:24:18 -0500
Subject: [PATCH 2/4] [ValueTracking] Implement `computeKnownBits` for
`llvm.vector.reduce.{add,mul}`
---
llvm/lib/Analysis/ValueTracking.cpp | 20 ++++++++++
.../test/Transforms/InstCombine/known-bits.ll | 40 ++++---------------
2 files changed, 28 insertions(+), 32 deletions(-)
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 3a10de72a27562..8bda957cb0a6a5 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1629,6 +1629,26 @@ static void computeKnownBitsFromOperator(const Operator *I,
case Intrinsic::vector_reduce_smin:
computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
break;
+ case Intrinsic::vector_reduce_mul:
+ case Intrinsic::vector_reduce_add:
+ // We compute the common bits for all elements then apply the reduce op
+ // NumEle times. This is mostly useful for known high zeros.
+ if (auto *VecTy =
+ dyn_cast<FixedVectorType>(I->getOperand(0)->getType())) {
+ computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+ KnownBits SingleKnown = Known;
+ for (unsigned i = 1, e = VecTy->getNumElements(); i < e; ++i) {
+ if (Known.isUnknown())
+ break;
+ if (II->getIntrinsicID() == Intrinsic::vector_reduce_add)
+ Known = KnownBits::computeForAddSub(
+ /*Add=*/true, /*NSW=*/false, /*NUW=*/false, SingleKnown,
+ Known);
+ else
+ Known = KnownBits::mul(SingleKnown, Known);
+ }
+ }
+ break;
case Intrinsic::umin:
computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
diff --git a/llvm/test/Transforms/InstCombine/known-bits.ll b/llvm/test/Transforms/InstCombine/known-bits.ll
index fde3e22143333b..4eca9ea87c6c4e 100644
--- a/llvm/test/Transforms/InstCombine/known-bits.ll
+++ b/llvm/test/Transforms/InstCombine/known-bits.ll
@@ -1001,10 +1001,7 @@ define i1 @extract_value_smul_fail(i8 %xx, i8 %yy) {
define i8 @known_reduce_add(<2 x i8> %xx) {
; CHECK-LABEL: @known_reduce_add(
-; CHECK-NEXT: [[X:%.*]] = and <2 x i8> [[XX:%.*]], <i8 3, i8 3>
-; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> [[X]])
-; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 8
-; CHECK-NEXT: ret i8 [[R]]
+; CHECK-NEXT: ret i8 0
;
%x = and <2 x i8> %xx, <i8 3, i8 3>
%v = call i8 @llvm.vector.reduce.add(<2 x i8> %x)
@@ -1040,10 +1037,7 @@ define i8 @known_reduce_add_fail2(<4 x i8> %xx) {
define i8 @known_reduce_add2(<4 x i8> %xx) {
; CHECK-LABEL: @known_reduce_add2(
-; CHECK-NEXT: [[X:%.*]] = and <4 x i8> [[XX:%.*]], <i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[X]])
-; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 32
-; CHECK-NEXT: ret i8 [[R]]
+; CHECK-NEXT: ret i8 0
;
%x = and <4 x i8> %xx, <i8 3, i8 3, i8 3, i8 3>
%v = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %x)
@@ -1053,10 +1047,7 @@ define i8 @known_reduce_add2(<4 x i8> %xx) {
define i8 @known_reduce_add3(<2 x i8> %xx) {
; CHECK-LABEL: @known_reduce_add3(
-; CHECK-NEXT: [[X:%.*]] = or <2 x i8> [[XX:%.*]], <i8 1, i8 1>
-; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> [[X]])
-; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 1
-; CHECK-NEXT: ret i8 [[R]]
+; CHECK-NEXT: ret i8 0
;
%x = or <2 x i8> %xx, <i8 1, i8 1>
%v = call i8 @llvm.vector.reduce.add(<2 x i8> %x)
@@ -1066,10 +1057,7 @@ define i8 @known_reduce_add3(<2 x i8> %xx) {
define i8 @known_reduce_add33(<3 x i8> %xx) {
; CHECK-LABEL: @known_reduce_add33(
-; CHECK-NEXT: [[X:%.*]] = or <3 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1>
-; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.add.v3i8(<3 x i8> [[X]])
-; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 1
-; CHECK-NEXT: ret i8 [[R]]
+; CHECK-NEXT: ret i8 1
;
%x = or <3 x i8> %xx, <i8 1, i8 1, i8 1>
%v = call i8 @llvm.vector.reduce.add.v3i8(<3 x i8> %x)
@@ -1079,10 +1067,7 @@ define i8 @known_reduce_add33(<3 x i8> %xx) {
define i8 @known_reduce_add34(<4 x i8> %xx) {
; CHECK-LABEL: @known_reduce_add34(
-; CHECK-NEXT: [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[X]])
-; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 1
-; CHECK-NEXT: ret i8 [[R]]
+; CHECK-NEXT: ret i8 0
;
%x = or <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
%v = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %x)
@@ -1120,10 +1105,7 @@ define i8 @known_reduce_add4_fail(<2 x i8> %xx) {
define i8 @known_reduce_mul(<2 x i8> %xx) {
; CHECK-LABEL: @known_reduce_mul(
-; CHECK-NEXT: [[X:%.*]] = and <2 x i8> [[XX:%.*]], <i8 3, i8 3>
-; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> [[X]])
-; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 16
-; CHECK-NEXT: ret i8 [[R]]
+; CHECK-NEXT: ret i8 0
;
%x = and <2 x i8> %xx, <i8 3, i8 3>
%v = call i8 @llvm.vector.reduce.mul(<2 x i8> %x)
@@ -1159,10 +1141,7 @@ define i8 @known_reduce_mul_fail2(<3 x i8> %xx) {
define i8 @known_reduce_mul2(<3 x i8> %xx) {
; CHECK-LABEL: @known_reduce_mul2(
-; CHECK-NEXT: [[X:%.*]] = and <3 x i8> [[XX:%.*]], <i8 3, i8 3, i8 3>
-; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.mul.v3i8(<3 x i8> [[X]])
-; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 64
-; CHECK-NEXT: ret i8 [[R]]
+; CHECK-NEXT: ret i8 0
;
%x = and <3 x i8> %xx, <i8 3, i8 3, i8 3>
%v = call i8 @llvm.vector.reduce.mul.v3i8(<3 x i8> %x)
@@ -1172,10 +1151,7 @@ define i8 @known_reduce_mul2(<3 x i8> %xx) {
define i8 @known_reduce_mul3(<2 x i8> %xx) {
; CHECK-LABEL: @known_reduce_mul3(
-; CHECK-NEXT: [[X:%.*]] = or <2 x i8> [[XX:%.*]], <i8 1, i8 1>
-; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> [[X]])
-; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 1
-; CHECK-NEXT: ret i8 [[R]]
+; CHECK-NEXT: ret i8 1
;
%x = or <2 x i8> %xx, <i8 1, i8 1>
%v = call i8 @llvm.vector.reduce.mul(<2 x i8> %x)
>From 57789b825f2de6d4d8ed9d3d8012f15f4ff41778 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Wed, 10 Apr 2024 15:29:02 -0500
Subject: [PATCH 3/4] [ValueTracking] Add tests for `isKnownNonZero` of
`llvm.vector.reduce.{add,mul}`; NFC
---
.../Transforms/InstSimplify/known-non-zero.ll | 117 ++++++++++++++++++
1 file changed, 117 insertions(+)
diff --git a/llvm/test/Transforms/InstSimplify/known-non-zero.ll b/llvm/test/Transforms/InstSimplify/known-non-zero.ll
index d9b8f5eed32390..fd2febe6ea26c2 100644
--- a/llvm/test/Transforms/InstSimplify/known-non-zero.ll
+++ b/llvm/test/Transforms/InstSimplify/known-non-zero.ll
@@ -377,3 +377,120 @@ define <2 x i1> @insert_nonzero_any_idx_fail(<2 x i8> %xx, i8 %yy, i32 %idx) {
%r = icmp eq <2 x i8> %ins, zeroinitializer
ret <2 x i1> %r
}
+
+define i1 @nonzero_reduce_add(<2 x i8> %xx) {
+; CHECK-LABEL: @nonzero_reduce_add(
+; CHECK-NEXT: [[X0:%.*]] = and <2 x i8> [[XX:%.*]], <i8 3, i8 3>
+; CHECK-NEXT: [[X:%.*]] = add <2 x i8> [[X0]], <i8 1, i8 1>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %x0 = and <2 x i8> %xx, <i8 3, i8 3>
+ %x = add <2 x i8> %x0, <i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.add(<2 x i8> %x)
+ %r = icmp eq i8 %v, 0
+ ret i1 %r
+}
+
+define i1 @nonzero_reduce_add_fail(<2 x i8> %xx) {
+; CHECK-LABEL: @nonzero_reduce_add_fail(
+; CHECK-NEXT: [[X0:%.*]] = and <2 x i8> [[XX:%.*]], <i8 3, i8 3>
+; CHECK-NEXT: [[X:%.*]] = add <2 x i8> [[X0]], <i8 1, i8 0>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %x0 = and <2 x i8> %xx, <i8 3, i8 3>
+ %x = add <2 x i8> %x0, <i8 1, i8 0>
+ %v = call i8 @llvm.vector.reduce.add(<2 x i8> %x)
+ %r = icmp eq i8 %v, 0
+ ret i1 %r
+}
+
+define i1 @nonzero_reduce_add_fail2(<2 x i8> %xx) {
+; CHECK-LABEL: @nonzero_reduce_add_fail2(
+; CHECK-NEXT: [[X:%.*]] = add nuw <2 x i8> [[XX:%.*]], <i8 1, i8 1>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %x = add nuw <2 x i8> %xx, <i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.add(<2 x i8> %x)
+ %r = icmp eq i8 %v, 0
+ ret i1 %r
+}
+
+define i1 @nonzero_reduce_add_fail3(<18 x i4> %xx) {
+; CHECK-LABEL: @nonzero_reduce_add_fail3(
+; CHECK-NEXT: [[X0:%.*]] = and <18 x i4> [[XX:%.*]], <i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3>
+; CHECK-NEXT: [[X:%.*]] = add <18 x i4> [[X0]], <i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1>
+; CHECK-NEXT: [[V:%.*]] = call i4 @llvm.vector.reduce.add.v18i4(<18 x i4> [[X]])
+; CHECK-NEXT: [[R:%.*]] = icmp eq i4 [[V]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %x0 = and <18 x i4> %xx, <i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3>
+ %x = add <18 x i4> %x0, <i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1>
+ %v = call i4 @llvm.vector.reduce.add.v18i4(<18 x i4> %x)
+ %r = icmp eq i4 %v, 0
+ ret i1 %r
+}
+
+define i1 @nonzero_reduce_mul(<2 x i8> %xx) {
+; CHECK-LABEL: @nonzero_reduce_mul(
+; CHECK-NEXT: [[X0:%.*]] = and <2 x i8> [[XX:%.*]], <i8 3, i8 3>
+; CHECK-NEXT: [[X:%.*]] = add <2 x i8> [[X0]], <i8 1, i8 1>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %x0 = and <2 x i8> %xx, <i8 3, i8 3>
+ %x = add <2 x i8> %x0, <i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.mul(<2 x i8> %x)
+ %r = icmp eq i8 %v, 0
+ ret i1 %r
+}
+
+define i1 @nonzero_reduce_mul2(<3 x i16> %xx) {
+; CHECK-LABEL: @nonzero_reduce_mul2(
+; CHECK-NEXT: [[X0:%.*]] = and <3 x i16> [[XX:%.*]], <i16 3, i16 3, i16 3>
+; CHECK-NEXT: [[X:%.*]] = add <3 x i16> [[X0]], <i16 1, i16 1, i16 1>
+; CHECK-NEXT: [[V:%.*]] = call i16 @llvm.vector.reduce.mul.v3i16(<3 x i16> [[X]])
+; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[V]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %x0 = and <3 x i16> %xx, <i16 3, i16 3, i16 3>
+ %x = add <3 x i16> %x0, <i16 1, i16 1, i16 1>
+ %v = call i16 @llvm.vector.reduce.mul.v3i16(<3 x i16> %x)
+ %r = icmp eq i16 %v, 0
+ ret i1 %r
+}
+
+define i1 @nonzero_reduce_mul_fail(<2 x i8> %xx) {
+; CHECK-LABEL: @nonzero_reduce_mul_fail(
+; CHECK-NEXT: [[X0:%.*]] = and <2 x i8> [[XX:%.*]], <i8 15, i8 15>
+; CHECK-NEXT: [[X:%.*]] = add <2 x i8> [[X0]], <i8 1, i8 1>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %x0 = and <2 x i8> %xx, <i8 15, i8 15>
+ %x = add <2 x i8> %x0, <i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.mul(<2 x i8> %x)
+ %r = icmp eq i8 %v, 0
+ ret i1 %r
+}
+
+define i1 @nonzero_reduce_mul_fail2(<2 x i8> %xx) {
+; CHECK-LABEL: @nonzero_reduce_mul_fail2(
+; CHECK-NEXT: [[X:%.*]] = add nuw <2 x i8> [[XX:%.*]], <i8 1, i8 1>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %x = add nuw <2 x i8> %xx, <i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.mul(<2 x i8> %x)
+ %r = icmp eq i8 %v, 0
+ ret i1 %r
+}
+
>From f03faacf8d2130ab58f7ec5163f93c99e10ccd62 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Wed, 10 Apr 2024 15:36:33 -0500
Subject: [PATCH 4/4] [ValueTracking] Implement `isKnownNonZero` for
`llvm.vector.reduce.{add,mul}`
Proof for bespoke non-zero logic: https://alive2.llvm.org/ce/z/P6HRvw
---
llvm/lib/Analysis/ValueTracking.cpp | 35 +++++++++++++++++++
.../Transforms/InstSimplify/known-non-zero.ll | 18 ++--------
2 files changed, 38 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 8bda957cb0a6a5..32ffe5adc41f51 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -2924,6 +2924,41 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
case Intrinsic::vector_reduce_smax:
case Intrinsic::vector_reduce_smin:
return isKnownNonZero(II->getArgOperand(0), Depth, Q);
+ // If we know the reduction doesn't overflow and all elements are
+ // non-zero, the reduction is non-zero.
+ case Intrinsic::vector_reduce_mul:
+ case Intrinsic::vector_reduce_add:
+ if (computeKnownBits(I, Depth + 1, Q).isNonZero())
+ return true;
+
+ if (auto *VecTy =
+ dyn_cast<FixedVectorType>(I->getOperand(0)->getType())) {
+ bool Overflow;
+ if (II->getIntrinsicID() == Intrinsic::vector_reduce_add) {
+ APInt NumEle(BitWidth, VecTy->getNumElements());
+ // If we can't store num ele in bitwidth, the result is either
+ // known-zero or we won't get anything useful.
+ if (NumEle.getZExtValue() != VecTy->getNumElements())
+ break;
+ APInt MaxVal =
+ computeKnownBits(II->getArgOperand(0), Depth, Q).getMaxValue();
+ MaxVal = MaxVal.umul_ov(NumEle, Overflow);
+ } else {
+ APInt MaxVal =
+ computeKnownBits(II->getArgOperand(0), Depth, Q).getMaxValue();
+ APInt SingleVal = MaxVal;
+ for (unsigned i = 1, e = VecTy->getNumElements(); i < e; ++i) {
+ MaxVal = MaxVal.umul_ov(SingleVal, Overflow);
+ if (Overflow)
+ break;
+ }
+ }
+
+ if (Overflow)
+ break;
+ return isKnownNonZero(II->getArgOperand(0), Depth, Q);
+ }
+ break;
case Intrinsic::umax:
case Intrinsic::uadd_sat:
return isKnownNonZero(II->getArgOperand(1), DemandedElts, Depth, Q) ||
diff --git a/llvm/test/Transforms/InstSimplify/known-non-zero.ll b/llvm/test/Transforms/InstSimplify/known-non-zero.ll
index fd2febe6ea26c2..f620ecd8d853dc 100644
--- a/llvm/test/Transforms/InstSimplify/known-non-zero.ll
+++ b/llvm/test/Transforms/InstSimplify/known-non-zero.ll
@@ -380,11 +380,7 @@ define <2 x i1> @insert_nonzero_any_idx_fail(<2 x i8> %xx, i8 %yy, i32 %idx) {
define i1 @nonzero_reduce_add(<2 x i8> %xx) {
; CHECK-LABEL: @nonzero_reduce_add(
-; CHECK-NEXT: [[X0:%.*]] = and <2 x i8> [[XX:%.*]], <i8 3, i8 3>
-; CHECK-NEXT: [[X:%.*]] = add <2 x i8> [[X0]], <i8 1, i8 1>
-; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> [[X]])
-; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 false
;
%x0 = and <2 x i8> %xx, <i8 3, i8 3>
%x = add <2 x i8> %x0, <i8 1, i8 1>
@@ -438,11 +434,7 @@ define i1 @nonzero_reduce_add_fail3(<18 x i4> %xx) {
define i1 @nonzero_reduce_mul(<2 x i8> %xx) {
; CHECK-LABEL: @nonzero_reduce_mul(
-; CHECK-NEXT: [[X0:%.*]] = and <2 x i8> [[XX:%.*]], <i8 3, i8 3>
-; CHECK-NEXT: [[X:%.*]] = add <2 x i8> [[X0]], <i8 1, i8 1>
-; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> [[X]])
-; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 false
;
%x0 = and <2 x i8> %xx, <i8 3, i8 3>
%x = add <2 x i8> %x0, <i8 1, i8 1>
@@ -453,11 +445,7 @@ define i1 @nonzero_reduce_mul(<2 x i8> %xx) {
define i1 @nonzero_reduce_mul2(<3 x i16> %xx) {
; CHECK-LABEL: @nonzero_reduce_mul2(
-; CHECK-NEXT: [[X0:%.*]] = and <3 x i16> [[XX:%.*]], <i16 3, i16 3, i16 3>
-; CHECK-NEXT: [[X:%.*]] = add <3 x i16> [[X0]], <i16 1, i16 1, i16 1>
-; CHECK-NEXT: [[V:%.*]] = call i16 @llvm.vector.reduce.mul.v3i16(<3 x i16> [[X]])
-; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[V]], 0
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 false
;
%x0 = and <3 x i16> %xx, <i16 3, i16 3, i16 3>
%x = add <3 x i16> %x0, <i16 1, i16 1, i16 1>
More information about the llvm-commits
mailing list