[llvm] [InstCombine] Fold umul.overflow(x, c1) | (x*c1 > c2) to x > c2/c1 (PR #147327)
Marius Kamp via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 9 11:12:53 PDT 2025
https://github.com/mskamp updated https://github.com/llvm/llvm-project/pull/147327
>From c40f4064092a9e753a1674278709dade8253ca43 Mon Sep 17 00:00:00 2001
From: Marius Kamp <msk at posteo.org>
Date: Sun, 6 Jul 2025 09:18:15 +0200
Subject: [PATCH 1/2] [InstCombine] Add Tests for umul.overflow(x, c1) | x*c1 >
c2; NFC
---
.../InstCombine/icmp_or_umul_overflow.ll | 268 ++++++++++++++++++
1 file changed, 268 insertions(+)
create mode 100644 llvm/test/Transforms/InstCombine/icmp_or_umul_overflow.ll
diff --git a/llvm/test/Transforms/InstCombine/icmp_or_umul_overflow.ll b/llvm/test/Transforms/InstCombine/icmp_or_umul_overflow.ll
new file mode 100644
index 0000000000000..fe61240763849
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp_or_umul_overflow.ll
@@ -0,0 +1,268 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+declare void @use.i1(i1 %x)
+declare void @use.i64(i64 %x)
+declare void @use.i64i1({i64, i1} %x)
+
+define i1 @umul_greater_than_or_overflow_const(i64 %in) {
+; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const(
+; CHECK-SAME: i64 [[IN:%.*]]) {
+; CHECK-NEXT: [[TMP2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 168)
+; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], -16
+; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
+; CHECK-NEXT: ret i1 [[TMP6]]
+;
+ %mwo = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 168)
+ %mul = extractvalue { i64, i1 } %mwo, 0
+ %ovf = extractvalue { i64, i1 } %mwo, 1
+ %cmp = icmp ugt i64 %mul, -16
+ %ret = or i1 %ovf, %cmp
+ ret i1 %ret
+}
+
+define i1 @umul_greater_than_or_overflow_const_i8(i8 %in) {
+; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_i8(
+; CHECK-SAME: i8 [[IN:%.*]]) {
+; CHECK-NEXT: [[TMP2:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[IN]], i8 24)
+; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i8, i1 } [[TMP2]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i8, i1 } [[TMP2]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i8 [[TMP3]], -16
+; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
+; CHECK-NEXT: ret i1 [[TMP6]]
+;
+ %mwo = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %in, i8 24)
+ %mul = extractvalue { i8, i1 } %mwo, 0
+ %ovf = extractvalue { i8, i1 } %mwo, 1
+ %cmp = icmp ugt i8 %mul, -16
+ %ret = or i1 %ovf, %cmp
+ ret i1 %ret
+}
+
+define i1 @umul_greater_than_or_overflow_const_commuted(i64 %in) {
+; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_commuted(
+; CHECK-SAME: i64 [[IN:%.*]]) {
+; CHECK-NEXT: [[TMP2:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 48)
+; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], 9223372036854775800
+; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP5]], [[TMP4]]
+; CHECK-NEXT: ret i1 [[TMP6]]
+;
+ %mwo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 48)
+ %mul = extractvalue { i64, i1 } %mwo, 0
+ %ovf = extractvalue { i64, i1 } %mwo, 1
+ %cmp = icmp ugt i64 %mul, 9223372036854775800
+ %ret = or i1 %cmp, %ovf
+ ret i1 %ret
+}
+
+define i1 @umul_greater_than_or_overflow_const_disjoint(i64 %in) {
+; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_disjoint(
+; CHECK-SAME: i64 [[IN:%.*]]) {
+; CHECK-NEXT: [[TMP2:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 40)
+; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], 9223372036854775800
+; CHECK-NEXT: [[TMP6:%.*]] = or disjoint i1 [[TMP4]], [[TMP5]]
+; CHECK-NEXT: ret i1 [[TMP6]]
+;
+ %mwo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 40)
+ %mul = extractvalue { i64, i1 } %mwo, 0
+ %ovf = extractvalue { i64, i1 } %mwo, 1
+ %cmp = icmp ugt i64 %mul, 9223372036854775800
+ %ret = or disjoint i1 %ovf, %cmp
+ ret i1 %ret
+}
+
+define i1 @umul_greater_than_or_overflow_const_multiuse_mul(i64 %in) {
+; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_multiuse_mul(
+; CHECK-SAME: i64 [[IN:%.*]]) {
+; CHECK-NEXT: [[MWO:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 48)
+; CHECK-NEXT: [[MUL:%.*]] = extractvalue { i64, i1 } [[MWO]], 0
+; CHECK-NEXT: [[OVF:%.*]] = extractvalue { i64, i1 } [[MWO]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MUL]], 9223372036854775800
+; CHECK-NEXT: [[RET:%.*]] = or i1 [[OVF]], [[CMP]]
+; CHECK-NEXT: tail call void @use.i64(i64 [[MUL]])
+; CHECK-NEXT: ret i1 [[RET]]
+;
+ %mwo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 48)
+ %mul = extractvalue { i64, i1 } %mwo, 0
+ %ovf = extractvalue { i64, i1 } %mwo, 1
+ %cmp = icmp ugt i64 %mul, 9223372036854775800
+ %ret = or i1 %ovf, %cmp
+ tail call void @use.i64(i64 %mul)
+ ret i1 %ret
+}
+
+define i1 @umul_greater_than_or_overflow_const_multiuse_overflow(i64 %in) {
+; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_multiuse_overflow(
+; CHECK-SAME: i64 [[IN:%.*]]) {
+; CHECK-NEXT: [[MWO:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 48)
+; CHECK-NEXT: [[MUL:%.*]] = extractvalue { i64, i1 } [[MWO]], 0
+; CHECK-NEXT: [[OVF:%.*]] = extractvalue { i64, i1 } [[MWO]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MUL]], 9223372036854775800
+; CHECK-NEXT: [[RET:%.*]] = or i1 [[OVF]], [[CMP]]
+; CHECK-NEXT: tail call void @use.i1(i1 [[OVF]])
+; CHECK-NEXT: ret i1 [[RET]]
+;
+ %mwo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 48)
+ %mul = extractvalue { i64, i1 } %mwo, 0
+ %ovf = extractvalue { i64, i1 } %mwo, 1
+ %cmp = icmp ugt i64 %mul, 9223372036854775800
+ %ret = or i1 %ovf, %cmp
+ tail call void @use.i1(i1 %ovf)
+ ret i1 %ret
+}
+
+define <2 x i1> @umul_greater_than_or_overflow_const_vector_splat(<2 x i64> %in) {
+; CHECK-LABEL: define <2 x i1> @umul_greater_than_or_overflow_const_vector_splat(
+; CHECK-SAME: <2 x i64> [[IN:%.*]]) {
+; CHECK-NEXT: [[TMP2:%.*]] = tail call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> [[IN]], <2 x i64> splat (i64 1424))
+; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <2 x i64>, <2 x i1> } [[TMP2]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i64>, <2 x i1> } [[TMP2]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt <2 x i64> [[TMP3]], splat (i64 9223372036854775800)
+; CHECK-NEXT: [[TMP6:%.*]] = or <2 x i1> [[TMP4]], [[TMP5]]
+; CHECK-NEXT: ret <2 x i1> [[TMP6]]
+;
+ %mwo = tail call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> %in, <2 x i64> <i64 1424, i64 1424>)
+ %mul = extractvalue { <2 x i64>, <2 x i1> } %mwo, 0
+ %ovf = extractvalue { <2 x i64>, <2 x i1> } %mwo, 1
+ %cmp = icmp ugt <2 x i64> %mul, <i64 9223372036854775800, i64 9223372036854775800>
+ %ret = or <2 x i1> %ovf, %cmp
+ ret <2 x i1> %ret
+}
+
+; Negative test
+define <4 x i1> @umul_greater_than_or_overflow_const_vector_non_splat_negative(<4 x i64> %in) {
+; CHECK-LABEL: define <4 x i1> @umul_greater_than_or_overflow_const_vector_non_splat_negative(
+; CHECK-SAME: <4 x i64> [[IN:%.*]]) {
+; CHECK-NEXT: [[MWO:%.*]] = tail call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> [[IN]], <4 x i64> <i64 24, i64 1424, i64 0, i64 -1>)
+; CHECK-NEXT: [[MUL:%.*]] = extractvalue { <4 x i64>, <4 x i1> } [[MWO]], 0
+; CHECK-NEXT: [[OVF:%.*]] = extractvalue { <4 x i64>, <4 x i1> } [[MWO]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <4 x i64> [[MUL]], <i64 9223372036854775000, i64 9223372036854775800, i64 -16, i64 -16>
+; CHECK-NEXT: [[RET:%.*]] = or <4 x i1> [[OVF]], [[CMP]]
+; CHECK-NEXT: ret <4 x i1> [[RET]]
+;
+ %mwo = tail call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v2i64(<4 x i64> %in, <4 x i64> <i64 24, i64 1424, i64 0, i64 -1>)
+ %mul = extractvalue { <4 x i64>, <4 x i1> } %mwo, 0
+ %ovf = extractvalue { <4 x i64>, <4 x i1> } %mwo, 1
+ %cmp = icmp ugt <4 x i64> %mul, <i64 9223372036854775000, i64 9223372036854775800, i64 -16, i64 -16>
+ %ret = or <4 x i1> %ovf, %cmp
+ ret <4 x i1> %ret
+}
+
+; Negative test
+define <2 x i1> @umul_greater_than_or_overflow_const_vector_poison_non_splat_negative(<2 x i64> %in) {
+; CHECK-LABEL: define <2 x i1> @umul_greater_than_or_overflow_const_vector_poison_non_splat_negative(
+; CHECK-SAME: <2 x i64> [[IN:%.*]]) {
+; CHECK-NEXT: [[MWO:%.*]] = tail call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> [[IN]], <2 x i64> <i64 poison, i64 1424>)
+; CHECK-NEXT: [[MUL:%.*]] = extractvalue { <2 x i64>, <2 x i1> } [[MWO]], 0
+; CHECK-NEXT: [[OVF:%.*]] = extractvalue { <2 x i64>, <2 x i1> } [[MWO]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i64> [[MUL]], <i64 9223372036854775800, i64 poison>
+; CHECK-NEXT: [[RET:%.*]] = or <2 x i1> [[OVF]], [[CMP]]
+; CHECK-NEXT: ret <2 x i1> [[RET]]
+;
+ %mwo = tail call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> %in, <2 x i64> <i64 poison, i64 1424>)
+ %mul = extractvalue { <2 x i64>, <2 x i1> } %mwo, 0
+ %ovf = extractvalue { <2 x i64>, <2 x i1> } %mwo, 1
+ %cmp = icmp ugt <2 x i64> %mul, <i64 9223372036854775800, i64 poison>
+ %ret = or <2 x i1> %ovf, %cmp
+ ret <2 x i1> %ret
+}
+
+; Negative test
+define i1 @umul_greater_than_and_overflow_const_negative(i64 %in) {
+; CHECK-LABEL: define i1 @umul_greater_than_and_overflow_const_negative(
+; CHECK-SAME: i64 [[IN:%.*]]) {
+; CHECK-NEXT: [[TMP2:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 48)
+; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP3]], 9223372036854775800
+; CHECK-NEXT: [[TMP6:%.*]] = and i1 [[TMP4]], [[TMP5]]
+; CHECK-NEXT: ret i1 [[TMP6]]
+;
+ %mwo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 48)
+ %mul = extractvalue { i64, i1 } %mwo, 0
+ %ovf = extractvalue { i64, i1 } %mwo, 1
+ %cmp = icmp ult i64 %mul, 9223372036854775800
+ %ret = and i1 %ovf, %cmp
+ ret i1 %ret
+}
+
+; Negative test
+define i1 @umul_less_than_or_overflow_const_negative(i64 %in) {
+; CHECK-LABEL: define i1 @umul_less_than_or_overflow_const_negative(
+; CHECK-SAME: i64 [[IN:%.*]]) {
+; CHECK-NEXT: [[TMP2:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 48)
+; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP3]], 9223372036854775800
+; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
+; CHECK-NEXT: ret i1 [[TMP6]]
+;
+ %mwo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 48)
+ %mul = extractvalue { i64, i1 } %mwo, 0
+ %ovf = extractvalue { i64, i1 } %mwo, 1
+ %cmp = icmp ult i64 %mul, 9223372036854775800
+ %ret = or i1 %ovf, %cmp
+ ret i1 %ret
+}
+
+; Negative test
+define i1 @umul_greater_than_or_overflow_const_multiuse_icmp_negative(i64 %in) {
+; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_multiuse_icmp_negative(
+; CHECK-SAME: i64 [[IN:%.*]]) {
+; CHECK-NEXT: [[TMP2:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 48)
+; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], 9223372036854775800
+; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
+; CHECK-NEXT: tail call void @use.i1(i1 [[TMP5]])
+; CHECK-NEXT: ret i1 [[TMP6]]
+;
+ %mwo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 48)
+ %mul = extractvalue { i64, i1 } %mwo, 0
+ %ovf = extractvalue { i64, i1 } %mwo, 1
+ %cmp = icmp ugt i64 %mul, 9223372036854775800
+ %ret = or i1 %ovf, %cmp
+ tail call void @use.i1(i1 %cmp)
+ ret i1 %ret
+}
+
+; Negative test
+define i1 @umul_greater_than_or_overflow_const_multiuse_umul_call_negative(i64 %in) {
+; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_multiuse_umul_call_negative(
+; CHECK-SAME: i64 [[IN:%.*]]) {
+; CHECK-NEXT: [[TMP2:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 48)
+; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], 9223372036854775800
+; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
+; CHECK-NEXT: tail call void @use.i64i1({ i64, i1 } [[TMP2]])
+; CHECK-NEXT: ret i1 [[TMP6]]
+;
+ %mwo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 48)
+ %mul = extractvalue { i64, i1 } %mwo, 0
+ %ovf = extractvalue { i64, i1 } %mwo, 1
+ %cmp = icmp ugt i64 %mul, 9223372036854775800
+ %ret = or i1 %ovf, %cmp
+ tail call void @use.i64i1({ i64, i1 } %mwo)
+ ret i1 %ret
+}
+
+; Negative test. The umul.with.overflow should be folded away before.
+define i1 @umul_greater_than_or_overflow_const_0_negative(i64 %in) {
+; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_0_negative(
+; CHECK-SAME: i64 [[IN:%.*]]) {
+; CHECK-NEXT: ret i1 false
+;
+ %mwo = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 0)
+ %mul = extractvalue { i64, i1 } %mwo, 0
+ %ovf = extractvalue { i64, i1 } %mwo, 1
+ %cmp = icmp ugt i64 %mul, 0
+ %ret = or i1 %ovf, %cmp
+ ret i1 %ret
+}
>From 131b14f54b2dc19bb2706f7dd54d8b7e1e70e07d Mon Sep 17 00:00:00 2001
From: Marius Kamp <msk at posteo.org>
Date: Sun, 6 Jul 2025 11:32:11 +0200
Subject: [PATCH 2/2] [InstCombine] Fold umul.overflow(x, c1) | (x*c1 > c2) to
x > c2/c1
The motivation of this pattern is to check whether the product of a
variable and a constant would be mathematically (i.e., as integer
numbers instead of bit vectors) greater than a given constant bound. The
pattern appears to occur when compiling several Rust projects (it seems
to originate from the `smallvec` crate but I have not checked this
further).
Unless `c1` is `0`, we can transform this pattern into `x > c2/c1` with
all operations working on unsigned integers. Due to undefined behavior
when an element of a non-splat vector is `0`, the transform is only
implemented for scalars and splat vectors.
Alive proof: https://alive2.llvm.org/ce/z/LawTkm
Closes #142674
---
.../InstCombine/InstCombineAndOrXor.cpp | 29 +++++++++++
.../InstCombine/icmp_or_umul_overflow.ll | 52 +++++--------------
2 files changed, 42 insertions(+), 39 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 706cb828acc63..18f711ecc43e0 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3717,6 +3717,30 @@ Value *InstCombinerImpl::reassociateDisjointOr(Value *LHS, Value *RHS) {
return nullptr;
}
+/// Fold Res, Overflow = (umul.with.overflow x c1); (or Overflow (ugt Res c2))
+/// --> (ugt x (c2/c1)). This code checks whether a multiplication of two
+/// unsigned numbers (one is a constant) is mathematically greater than a
+/// second constant.
+static Value *foldOrUnsignedUMulOverflowICmp(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder,
+ const DataLayout &DL) {
+ const WithOverflowInst *WO;
+ const Value *WOV;
+ const APInt *C1, *C2;
+ if (match(&I,
+ m_c_Or(m_ExtractValue<1>(
+ m_CombineAnd(m_WithOverflowInst(WO), m_Value(WOV))),
+ m_OneUse(m_SpecificCmp(ICmpInst::ICMP_UGT,
+ m_ExtractValue<0>(m_Deferred(WOV)),
+ m_APInt(C2))))) &&
+ WO->getIntrinsicID() == Intrinsic::umul_with_overflow &&
+ match(WO->getRHS(), m_APInt(C1)) && !C1->isZero() && WO->hasNUses(2)) {
+ Constant *NewC = ConstantInt::get(WO->getLHS()->getType(), C2->udiv(*C1));
+ return Builder.CreateICmp(ICmpInst::ICMP_UGT, WO->getLHS(), NewC);
+ }
+ return nullptr;
+}
+
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.
@@ -4150,6 +4174,11 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
}
}
+ // Try to fold the pattern "Overflow | icmp pred Res, C2" into a single
+ // comparison instruction for umul.with.overflow.
+ if (Value *R = foldOrUnsignedUMulOverflowICmp(I, Builder, DL))
+ return replaceInstUsesWith(I, R);
+
// (~x) | y --> ~(x & (~y)) iff that gets rid of inversions
if (sinkNotIntoOtherHandOfLogicalOp(I))
return &I;
diff --git a/llvm/test/Transforms/InstCombine/icmp_or_umul_overflow.ll b/llvm/test/Transforms/InstCombine/icmp_or_umul_overflow.ll
index fe61240763849..d7fd3ff7f84ce 100644
--- a/llvm/test/Transforms/InstCombine/icmp_or_umul_overflow.ll
+++ b/llvm/test/Transforms/InstCombine/icmp_or_umul_overflow.ll
@@ -8,11 +8,7 @@ declare void @use.i64i1({i64, i1} %x)
define i1 @umul_greater_than_or_overflow_const(i64 %in) {
; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const(
; CHECK-SAME: i64 [[IN:%.*]]) {
-; CHECK-NEXT: [[TMP2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 168)
-; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], -16
-; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[IN]], 109802048057794950
; CHECK-NEXT: ret i1 [[TMP6]]
;
%mwo = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 168)
@@ -26,11 +22,7 @@ define i1 @umul_greater_than_or_overflow_const(i64 %in) {
define i1 @umul_greater_than_or_overflow_const_i8(i8 %in) {
; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_i8(
; CHECK-SAME: i8 [[IN:%.*]]) {
-; CHECK-NEXT: [[TMP2:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[IN]], i8 24)
-; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i8, i1 } [[TMP2]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i8, i1 } [[TMP2]], 1
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i8 [[TMP3]], -16
-; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[IN]], 10
; CHECK-NEXT: ret i1 [[TMP6]]
;
%mwo = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %in, i8 24)
@@ -44,11 +36,7 @@ define i1 @umul_greater_than_or_overflow_const_i8(i8 %in) {
define i1 @umul_greater_than_or_overflow_const_commuted(i64 %in) {
; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_commuted(
; CHECK-SAME: i64 [[IN:%.*]]) {
-; CHECK-NEXT: [[TMP2:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 48)
-; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], 9223372036854775800
-; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP5]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[IN]], 192153584101141162
; CHECK-NEXT: ret i1 [[TMP6]]
;
%mwo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 48)
@@ -62,11 +50,7 @@ define i1 @umul_greater_than_or_overflow_const_commuted(i64 %in) {
define i1 @umul_greater_than_or_overflow_const_disjoint(i64 %in) {
; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_disjoint(
; CHECK-SAME: i64 [[IN:%.*]]) {
-; CHECK-NEXT: [[TMP2:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 40)
-; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], 9223372036854775800
-; CHECK-NEXT: [[TMP6:%.*]] = or disjoint i1 [[TMP4]], [[TMP5]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[IN]], 230584300921369395
; CHECK-NEXT: ret i1 [[TMP6]]
;
%mwo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %in, i64 40)
@@ -80,11 +64,8 @@ define i1 @umul_greater_than_or_overflow_const_disjoint(i64 %in) {
define i1 @umul_greater_than_or_overflow_const_multiuse_mul(i64 %in) {
; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_multiuse_mul(
; CHECK-SAME: i64 [[IN:%.*]]) {
-; CHECK-NEXT: [[MWO:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 48)
-; CHECK-NEXT: [[MUL:%.*]] = extractvalue { i64, i1 } [[MWO]], 0
-; CHECK-NEXT: [[OVF:%.*]] = extractvalue { i64, i1 } [[MWO]], 1
-; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MUL]], 9223372036854775800
-; CHECK-NEXT: [[RET:%.*]] = or i1 [[OVF]], [[CMP]]
+; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[IN]], 48
+; CHECK-NEXT: [[RET:%.*]] = icmp ugt i64 [[IN]], 192153584101141162
; CHECK-NEXT: tail call void @use.i64(i64 [[MUL]])
; CHECK-NEXT: ret i1 [[RET]]
;
@@ -100,11 +81,8 @@ define i1 @umul_greater_than_or_overflow_const_multiuse_mul(i64 %in) {
define i1 @umul_greater_than_or_overflow_const_multiuse_overflow(i64 %in) {
; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_multiuse_overflow(
; CHECK-SAME: i64 [[IN:%.*]]) {
-; CHECK-NEXT: [[MWO:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 48)
-; CHECK-NEXT: [[MUL:%.*]] = extractvalue { i64, i1 } [[MWO]], 0
-; CHECK-NEXT: [[OVF:%.*]] = extractvalue { i64, i1 } [[MWO]], 1
-; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MUL]], 9223372036854775800
-; CHECK-NEXT: [[RET:%.*]] = or i1 [[OVF]], [[CMP]]
+; CHECK-NEXT: [[OVF:%.*]] = icmp ugt i64 [[IN]], 384307168202282325
+; CHECK-NEXT: [[RET:%.*]] = icmp ugt i64 [[IN]], 192153584101141162
; CHECK-NEXT: tail call void @use.i1(i1 [[OVF]])
; CHECK-NEXT: ret i1 [[RET]]
;
@@ -120,11 +98,7 @@ define i1 @umul_greater_than_or_overflow_const_multiuse_overflow(i64 %in) {
define <2 x i1> @umul_greater_than_or_overflow_const_vector_splat(<2 x i64> %in) {
; CHECK-LABEL: define <2 x i1> @umul_greater_than_or_overflow_const_vector_splat(
; CHECK-SAME: <2 x i64> [[IN:%.*]]) {
-; CHECK-NEXT: [[TMP2:%.*]] = tail call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> [[IN]], <2 x i64> splat (i64 1424))
-; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <2 x i64>, <2 x i1> } [[TMP2]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i64>, <2 x i1> } [[TMP2]], 1
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt <2 x i64> [[TMP3]], splat (i64 9223372036854775800)
-; CHECK-NEXT: [[TMP6:%.*]] = or <2 x i1> [[TMP4]], [[TMP5]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt <2 x i64> [[IN]], splat (i64 6477087104532848)
; CHECK-NEXT: ret <2 x i1> [[TMP6]]
;
%mwo = tail call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> %in, <2 x i64> <i64 1424, i64 1424>)
@@ -237,10 +211,10 @@ define i1 @umul_greater_than_or_overflow_const_multiuse_umul_call_negative(i64 %
; CHECK-LABEL: define i1 @umul_greater_than_or_overflow_const_multiuse_umul_call_negative(
; CHECK-SAME: i64 [[IN:%.*]]) {
; CHECK-NEXT: [[TMP2:%.*]] = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[IN]], i64 48)
-; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP3]], 9223372036854775800
-; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
+; CHECK-NEXT: [[MUL:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0
+; CHECK-NEXT: [[OVF:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[MUL]], 9223372036854775800
+; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[OVF]], [[CMP]]
; CHECK-NEXT: tail call void @use.i64i1({ i64, i1 } [[TMP2]])
; CHECK-NEXT: ret i1 [[TMP6]]
;
More information about the llvm-commits
mailing list