[llvm] [ValueTracking] Implement `isKnownNonZero`/`computeKnownBits` for `llvm.vector.reduce.{add,mul}` (PR #88410)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 11 09:30:19 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: None (goldsteinn)
<details>
<summary>Changes</summary>
- **[ValueTracking] Add tests for `computeKnownBits` of `llvm.vector.reduce.{add,mul}`; NFC**
- **[ValueTracking] Implement `computeKnownBits` for `llvm.vector.reduce.{add,mul}`**
- **[ValueTracking] Add tests for `isKnownNonZero` of `llvm.vector.reduce.{add,mul}`; NFC**
- **[ValueTracking] Implement `isKnownNonZero` for `llvm.vector.reduce.{add,mul}`**
---
Full diff: https://github.com/llvm/llvm-project/pull/88410.diff
3 Files Affected:
- (modified) llvm/lib/Analysis/ValueTracking.cpp (+55)
- (modified) llvm/test/Transforms/InstCombine/known-bits.ll (+160)
- (modified) llvm/test/Transforms/InstSimplify/known-non-zero.ll (+105)
``````````diff
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 3a10de72a27562..32ffe5adc41f51 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1629,6 +1629,26 @@ static void computeKnownBitsFromOperator(const Operator *I,
case Intrinsic::vector_reduce_smin:
computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
break;
+ case Intrinsic::vector_reduce_mul:
+ case Intrinsic::vector_reduce_add:
+ // We compute the common bits for all elements then apply the reduce op
+ // NumEle times. This is mostly useful for known high zeros.
+ if (auto *VecTy =
+ dyn_cast<FixedVectorType>(I->getOperand(0)->getType())) {
+ computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+ KnownBits SingleKnown = Known;
+ for (unsigned i = 1, e = VecTy->getNumElements(); i < e; ++i) {
+ if (Known.isUnknown())
+ break;
+ if (II->getIntrinsicID() == Intrinsic::vector_reduce_add)
+ Known = KnownBits::computeForAddSub(
+ /*Add=*/true, /*NSW=*/false, /*NUW=*/false, SingleKnown,
+ Known);
+ else
+ Known = KnownBits::mul(SingleKnown, Known);
+ }
+ }
+ break;
case Intrinsic::umin:
computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
@@ -2904,6 +2924,41 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
case Intrinsic::vector_reduce_smax:
case Intrinsic::vector_reduce_smin:
return isKnownNonZero(II->getArgOperand(0), Depth, Q);
+ // If we know the reduction doesn't overflow and all elements are
+ // non-zero, the reduction is non-zero.
+ case Intrinsic::vector_reduce_mul:
+ case Intrinsic::vector_reduce_add:
+ if (computeKnownBits(I, Depth + 1, Q).isNonZero())
+ return true;
+
+ if (auto *VecTy =
+ dyn_cast<FixedVectorType>(I->getOperand(0)->getType())) {
+ bool Overflow;
+ if (II->getIntrinsicID() == Intrinsic::vector_reduce_add) {
+ APInt NumEle(BitWidth, VecTy->getNumElements());
+ // If we can't store num ele in bitwidth, the result is either
+ // known-zero or we won't get anything useful.
+ if (NumEle.getZExtValue() != VecTy->getNumElements())
+ break;
+ APInt MaxVal =
+ computeKnownBits(II->getArgOperand(0), Depth, Q).getMaxValue();
+ MaxVal = MaxVal.umul_ov(NumEle, Overflow);
+ } else {
+ APInt MaxVal =
+ computeKnownBits(II->getArgOperand(0), Depth, Q).getMaxValue();
+ APInt SingleVal = MaxVal;
+ for (unsigned i = 1, e = VecTy->getNumElements(); i < e; ++i) {
+ MaxVal = MaxVal.umul_ov(SingleVal, Overflow);
+ if (Overflow)
+ break;
+ }
+ }
+
+ if (Overflow)
+ break;
+ return isKnownNonZero(II->getArgOperand(0), Depth, Q);
+ }
+ break;
case Intrinsic::umax:
case Intrinsic::uadd_sat:
return isKnownNonZero(II->getArgOperand(1), DemandedElts, Depth, Q) ||
diff --git a/llvm/test/Transforms/InstCombine/known-bits.ll b/llvm/test/Transforms/InstCombine/known-bits.ll
index d210b19bb7faf2..4eca9ea87c6c4e 100644
--- a/llvm/test/Transforms/InstCombine/known-bits.ll
+++ b/llvm/test/Transforms/InstCombine/known-bits.ll
@@ -999,5 +999,165 @@ define i1 @extract_value_smul_fail(i8 %xx, i8 %yy) {
ret i1 %r
}
+define i8 @known_reduce_add(<2 x i8> %xx) {
+; CHECK-LABEL: @known_reduce_add(
+; CHECK-NEXT: ret i8 0
+;
+ %x = and <2 x i8> %xx, <i8 3, i8 3>
+ %v = call i8 @llvm.vector.reduce.add(<2 x i8> %x)
+ %r = and i8 %v, 8
+ ret i8 %r
+}
+
+define i8 @known_reduce_add_fail(<2 x i8> %xx) {
+; CHECK-LABEL: @known_reduce_add_fail(
+; CHECK-NEXT: [[X:%.*]] = and <2 x i8> [[XX:%.*]], <i8 3, i8 3>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 4
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x = and <2 x i8> %xx, <i8 3, i8 3>
+ %v = call i8 @llvm.vector.reduce.add(<2 x i8> %x)
+ %r = and i8 %v, 4
+ ret i8 %r
+}
+
+define i8 @known_reduce_add_fail2(<4 x i8> %xx) {
+; CHECK-LABEL: @known_reduce_add_fail2(
+; CHECK-NEXT: [[X:%.*]] = and <4 x i8> [[XX:%.*]], <i8 3, i8 3, i8 3, i8 3>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 8
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x = and <4 x i8> %xx, <i8 3, i8 3, i8 3, i8 3>
+ %v = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %x)
+ %r = and i8 %v, 8
+ ret i8 %r
+}
+
+define i8 @known_reduce_add2(<4 x i8> %xx) {
+; CHECK-LABEL: @known_reduce_add2(
+; CHECK-NEXT: ret i8 0
+;
+ %x = and <4 x i8> %xx, <i8 3, i8 3, i8 3, i8 3>
+ %v = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %x)
+ %r = and i8 %v, 32
+ ret i8 %r
+}
+
+define i8 @known_reduce_add3(<2 x i8> %xx) {
+; CHECK-LABEL: @known_reduce_add3(
+; CHECK-NEXT: ret i8 0
+;
+ %x = or <2 x i8> %xx, <i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.add(<2 x i8> %x)
+ %r = and i8 %v, 1
+ ret i8 %r
+}
+
+define i8 @known_reduce_add33(<3 x i8> %xx) {
+; CHECK-LABEL: @known_reduce_add33(
+; CHECK-NEXT: ret i8 1
+;
+ %x = or <3 x i8> %xx, <i8 1, i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.add.v3i8(<3 x i8> %x)
+ %r = and i8 %v, 1
+ ret i8 %r
+}
+
+define i8 @known_reduce_add34(<4 x i8> %xx) {
+; CHECK-LABEL: @known_reduce_add34(
+; CHECK-NEXT: ret i8 0
+;
+ %x = or <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %x)
+ %r = and i8 %v, 1
+ ret i8 %r
+}
+
+define i8 @known_reduce_add4(<2 x i8> %xx) {
+; CHECK-LABEL: @known_reduce_add4(
+; CHECK-NEXT: [[X0:%.*]] = and <2 x i8> [[XX:%.*]], <i8 2, i8 2>
+; CHECK-NEXT: [[X:%.*]] = or disjoint <2 x i8> [[X0]], <i8 1, i8 1>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 2
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x0 = and <2 x i8> %xx, <i8 3, i8 3>
+ %x = or <2 x i8> %x0, <i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.add(<2 x i8> %x)
+ %r = and i8 %v, 2
+ ret i8 %r
+}
+
+define i8 @known_reduce_add4_fail(<2 x i8> %xx) {
+; CHECK-LABEL: @known_reduce_add4_fail(
+; CHECK-NEXT: [[X:%.*]] = or <2 x i8> [[XX:%.*]], <i8 1, i8 1>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 2
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x = or <2 x i8> %xx, <i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.add(<2 x i8> %x)
+ %r = and i8 %v, 2
+ ret i8 %r
+}
+
+define i8 @known_reduce_mul(<2 x i8> %xx) {
+; CHECK-LABEL: @known_reduce_mul(
+; CHECK-NEXT: ret i8 0
+;
+ %x = and <2 x i8> %xx, <i8 3, i8 3>
+ %v = call i8 @llvm.vector.reduce.mul(<2 x i8> %x)
+ %r = and i8 %v, 16
+ ret i8 %r
+}
+
+define i8 @known_reduce_mul_fail(<2 x i8> %xx) {
+; CHECK-LABEL: @known_reduce_mul_fail(
+; CHECK-NEXT: [[X:%.*]] = and <2 x i8> [[XX:%.*]], <i8 3, i8 3>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 8
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x = and <2 x i8> %xx, <i8 3, i8 3>
+ %v = call i8 @llvm.vector.reduce.mul(<2 x i8> %x)
+ %r = and i8 %v, 8
+ ret i8 %r
+}
+
+define i8 @known_reduce_mul_fail2(<3 x i8> %xx) {
+; CHECK-LABEL: @known_reduce_mul_fail2(
+; CHECK-NEXT: [[X:%.*]] = and <3 x i8> [[XX:%.*]], <i8 3, i8 3, i8 3>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.mul.v3i8(<3 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 32
+; CHECK-NEXT: ret i8 [[R]]
+;
+ %x = and <3 x i8> %xx, <i8 3, i8 3, i8 3>
+ %v = call i8 @llvm.vector.reduce.mul.v3i8(<3 x i8> %x)
+ %r = and i8 %v, 32
+ ret i8 %r
+}
+
+define i8 @known_reduce_mul2(<3 x i8> %xx) {
+; CHECK-LABEL: @known_reduce_mul2(
+; CHECK-NEXT: ret i8 0
+;
+ %x = and <3 x i8> %xx, <i8 3, i8 3, i8 3>
+ %v = call i8 @llvm.vector.reduce.mul.v3i8(<3 x i8> %x)
+ %r = and i8 %v, 64
+ ret i8 %r
+}
+
+define i8 @known_reduce_mul3(<2 x i8> %xx) {
+; CHECK-LABEL: @known_reduce_mul3(
+; CHECK-NEXT: ret i8 1
+;
+ %x = or <2 x i8> %xx, <i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.mul(<2 x i8> %x)
+ %r = and i8 %v, 1
+ ret i8 %r
+}
+
declare void @use(i1)
declare void @sink(i8)
diff --git a/llvm/test/Transforms/InstSimplify/known-non-zero.ll b/llvm/test/Transforms/InstSimplify/known-non-zero.ll
index d9b8f5eed32390..f620ecd8d853dc 100644
--- a/llvm/test/Transforms/InstSimplify/known-non-zero.ll
+++ b/llvm/test/Transforms/InstSimplify/known-non-zero.ll
@@ -377,3 +377,108 @@ define <2 x i1> @insert_nonzero_any_idx_fail(<2 x i8> %xx, i8 %yy, i32 %idx) {
%r = icmp eq <2 x i8> %ins, zeroinitializer
ret <2 x i1> %r
}
+
+define i1 @nonzero_reduce_add(<2 x i8> %xx) {
+; CHECK-LABEL: @nonzero_reduce_add(
+; CHECK-NEXT: ret i1 false
+;
+ %x0 = and <2 x i8> %xx, <i8 3, i8 3>
+ %x = add <2 x i8> %x0, <i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.add(<2 x i8> %x)
+ %r = icmp eq i8 %v, 0
+ ret i1 %r
+}
+
+define i1 @nonzero_reduce_add_fail(<2 x i8> %xx) {
+; CHECK-LABEL: @nonzero_reduce_add_fail(
+; CHECK-NEXT: [[X0:%.*]] = and <2 x i8> [[XX:%.*]], <i8 3, i8 3>
+; CHECK-NEXT: [[X:%.*]] = add <2 x i8> [[X0]], <i8 1, i8 0>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %x0 = and <2 x i8> %xx, <i8 3, i8 3>
+ %x = add <2 x i8> %x0, <i8 1, i8 0>
+ %v = call i8 @llvm.vector.reduce.add(<2 x i8> %x)
+ %r = icmp eq i8 %v, 0
+ ret i1 %r
+}
+
+define i1 @nonzero_reduce_add_fail2(<2 x i8> %xx) {
+; CHECK-LABEL: @nonzero_reduce_add_fail2(
+; CHECK-NEXT: [[X:%.*]] = add nuw <2 x i8> [[XX:%.*]], <i8 1, i8 1>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %x = add nuw <2 x i8> %xx, <i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.add(<2 x i8> %x)
+ %r = icmp eq i8 %v, 0
+ ret i1 %r
+}
+
+define i1 @nonzero_reduce_add_fail3(<18 x i4> %xx) {
+; CHECK-LABEL: @nonzero_reduce_add_fail3(
+; CHECK-NEXT: [[X0:%.*]] = and <18 x i4> [[XX:%.*]], <i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3>
+; CHECK-NEXT: [[X:%.*]] = add <18 x i4> [[X0]], <i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1>
+; CHECK-NEXT: [[V:%.*]] = call i4 @llvm.vector.reduce.add.v18i4(<18 x i4> [[X]])
+; CHECK-NEXT: [[R:%.*]] = icmp eq i4 [[V]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %x0 = and <18 x i4> %xx, <i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3, i4 3>
+ %x = add <18 x i4> %x0, <i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1, i4 1>
+ %v = call i4 @llvm.vector.reduce.add.v18i4(<18 x i4> %x)
+ %r = icmp eq i4 %v, 0
+ ret i1 %r
+}
+
+define i1 @nonzero_reduce_mul(<2 x i8> %xx) {
+; CHECK-LABEL: @nonzero_reduce_mul(
+; CHECK-NEXT: ret i1 false
+;
+ %x0 = and <2 x i8> %xx, <i8 3, i8 3>
+ %x = add <2 x i8> %x0, <i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.mul(<2 x i8> %x)
+ %r = icmp eq i8 %v, 0
+ ret i1 %r
+}
+
+define i1 @nonzero_reduce_mul2(<3 x i16> %xx) {
+; CHECK-LABEL: @nonzero_reduce_mul2(
+; CHECK-NEXT: ret i1 false
+;
+ %x0 = and <3 x i16> %xx, <i16 3, i16 3, i16 3>
+ %x = add <3 x i16> %x0, <i16 1, i16 1, i16 1>
+ %v = call i16 @llvm.vector.reduce.mul.v3i16(<3 x i16> %x)
+ %r = icmp eq i16 %v, 0
+ ret i1 %r
+}
+
+define i1 @nonzero_reduce_mul_fail(<2 x i8> %xx) {
+; CHECK-LABEL: @nonzero_reduce_mul_fail(
+; CHECK-NEXT: [[X0:%.*]] = and <2 x i8> [[XX:%.*]], <i8 15, i8 15>
+; CHECK-NEXT: [[X:%.*]] = add <2 x i8> [[X0]], <i8 1, i8 1>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %x0 = and <2 x i8> %xx, <i8 15, i8 15>
+ %x = add <2 x i8> %x0, <i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.mul(<2 x i8> %x)
+ %r = icmp eq i8 %v, 0
+ ret i1 %r
+}
+
+define i1 @nonzero_reduce_mul_fail2(<2 x i8> %xx) {
+; CHECK-LABEL: @nonzero_reduce_mul_fail2(
+; CHECK-NEXT: [[X:%.*]] = add nuw <2 x i8> [[XX:%.*]], <i8 1, i8 1>
+; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> [[X]])
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %x = add nuw <2 x i8> %xx, <i8 1, i8 1>
+ %v = call i8 @llvm.vector.reduce.mul(<2 x i8> %x)
+ %r = icmp eq i8 %v, 0
+ ret i1 %r
+}
+
``````````
</details>
https://github.com/llvm/llvm-project/pull/88410
More information about the llvm-commits
mailing list