[llvm] InstCombine: improve optimizations for ceiling division with no overflow (PR #142869)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Jun 7 13:58:11 PDT 2025
https://github.com/gaynor-anthropic updated https://github.com/llvm/llvm-project/pull/142869
>From ee8f7a53db2bc56e85d7e63a31228cf7e7453835 Mon Sep 17 00:00:00 2001
From: Alex Gaynor <gaynor at anthropic.com>
Date: Wed, 4 Jun 2025 18:34:08 -0400
Subject: [PATCH 1/5] InstCombine: improve optimizations for ceiling division
with no overflow
fixes #142497
---
.../InstCombine/InstCombineAddSub.cpp | 44 +++++
llvm/test/Transforms/InstCombine/add.ll | 157 ++++++++++++++++++
2 files changed, 201 insertions(+)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index a9ac5ff9b9c89..16ebd7bceff63 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1787,6 +1787,50 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
if (Instruction *Ashr = foldAddToAshr(I))
return Ashr;
+ // Ceiling division by power-of-2:
+ // (X >> log2(N)) + zext(X & (N-1) != 0) --> (X + (N-1)) >> log2(N)
+ // This is valid when adding (N-1) to X doesn't overflow.
+ {
+ Value *X = nullptr, *Cmp = nullptr;
+ const APInt *ShiftAmt = nullptr, *Mask = nullptr;
+ CmpPredicate Pred;
+
+ // Match: (X >> C) + zext((X & Mask) != 0)
+ // or: zext((X & Mask) != 0) + (X >> C)
+ Value *Op0 = I.getOperand(0);
+ Value *Op1 = I.getOperand(1);
+
+ // Try matching with shift on left, zext on right
+ bool Matched = false;
+ if (match(Op0, m_LShr(m_Value(X), m_APInt(ShiftAmt))) &&
+ match(Op1, m_ZExt(m_Value(Cmp)))) {
+ Matched = match(Cmp, m_ICmp(Pred, m_And(m_Specific(X), m_APInt(Mask)),
+ m_ZeroInt()));
+ } else if (match(Op1, m_LShr(m_Value(X), m_APInt(ShiftAmt))) &&
+ match(Op0, m_ZExt(m_Value(Cmp)))) {
+ Matched = match(Cmp, m_ICmp(Pred, m_And(m_Specific(X), m_APInt(Mask)),
+ m_ZeroInt()));
+ }
+
+ if (Matched &&
+ Pred == ICmpInst::ICMP_NE &&
+ ShiftAmt && ShiftAmt->uge(1) && ShiftAmt->ult(BitWidth) &&
+ Mask && *Mask == (APInt(BitWidth, 1) << *ShiftAmt) - 1) {
+
+ // Check if X + Mask doesn't overflow
+ Constant *MaskC = ConstantInt::get(X->getType(), *Mask);
+ bool WillNotOverflowUnsigned = willNotOverflowUnsignedAdd(X, MaskC, I);
+
+ if (WillNotOverflowUnsigned) {
+ // (X + Mask) >> ShiftAmt
+ bool WillNotOverflowSigned = willNotOverflowSignedAdd(X, MaskC, I);
+ Value *Add = Builder.CreateAdd(X, MaskC, "", WillNotOverflowUnsigned,
+ WillNotOverflowSigned);
+ return BinaryOperator::CreateLShr(Add, ConstantInt::get(X->getType(), *ShiftAmt));
+ }
+ }
+ }
+
// (~X) + (~Y) --> -2 - (X + Y)
{
// To ensure we can save instructions we need to ensure that we consume both
diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll
index 495f99824652d..d364082eab317 100644
--- a/llvm/test/Transforms/InstCombine/add.ll
+++ b/llvm/test/Transforms/InstCombine/add.ll
@@ -4273,4 +4273,161 @@ define i32 @fold_zext_nneg_add_const_fail2(i8 %x) {
}
declare void @llvm.assume(i1)
+declare i32 @llvm.ctlz.i32(i32, i1)
+
+; Ceiling division by power-of-2: (x >> log2(N)) + ((x & (N-1)) != 0) -> (x + (N-1)) >> log2(N)
+; This is only valid when x + (N-1) doesn't overflow
+
+; Test with known range that prevents overflow
+define noundef range(i32 0, 100) i32 @ceil_div_by_8_known_range(i32 noundef range(i32 0, 100) %x) {
+; CHECK-LABEL: @ceil_div_by_8_known_range(
+; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[X:%.*]], 7
+; CHECK-NEXT: [[R:%.*]] = lshr i32 [[TMP1]], 3
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %shr = lshr i32 %x, 3
+ %and = and i32 %x, 7
+ %cmp = icmp ne i32 %and, 0
+ %ext = zext i1 %cmp to i32
+ %r = add i32 %shr, %ext
+ ret i32 %r
+}
+
+; Test with the exact IR from the original testcase
+define noundef range(i32 0, 6) i32 @ceil_div_from_clz(i32 noundef %v) {
+; CHECK-LABEL: @ceil_div_from_clz(
+; CHECK-NEXT: [[CTLZ:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[V:%.*]], i1 false)
+; CHECK-NEXT: [[TMP1:%.*]] = sub nuw nsw i32 39, [[CTLZ]]
+; CHECK-NEXT: [[R:%.*]] = lshr i32 [[TMP1]], 3
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %ctlz = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 %v, i1 false)
+ %sub = sub nuw nsw i32 32, %ctlz
+ %shr = lshr i32 %sub, 3
+ %and = and i32 %sub, 7
+ %cmp = icmp ne i32 %and, 0
+ %ext = zext i1 %cmp to i32
+ %r = add nuw nsw i32 %shr, %ext
+ ret i32 %r
+}
+
+; Vector version with known range
+define <2 x i32> @ceil_div_by_8_vec_range(<2 x i32> range(i32 0, 1000) %x) {
+; CHECK-LABEL: @ceil_div_by_8_vec_range(
+; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw <2 x i32> [[X:%.*]], splat (i32 7)
+; CHECK-NEXT: [[R:%.*]] = lshr <2 x i32> [[TMP1]], splat (i32 3)
+; CHECK-NEXT: ret <2 x i32> [[R]]
+;
+ %shr = lshr <2 x i32> %x, <i32 3, i32 3>
+ %and = and <2 x i32> %x, <i32 7, i32 7>
+ %cmp = icmp ne <2 x i32> %and, <i32 0, i32 0>
+ %ext = zext <2 x i1> %cmp to <2 x i32>
+ %r = add <2 x i32> %shr, %ext
+ ret <2 x i32> %r
+}
+
+; Ceiling division by 16 with known range
+define i16 @ceil_div_by_16_i16(i16 range(i16 0, 1000) %x) {
+; CHECK-LABEL: @ceil_div_by_16_i16(
+; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i16 [[X:%.*]], 15
+; CHECK-NEXT: [[R:%.*]] = lshr i16 [[TMP1]], 4
+; CHECK-NEXT: ret i16 [[R]]
+;
+ %shr = lshr i16 %x, 4
+ %and = and i16 %x, 15
+ %cmp = icmp ne i16 %and, 0
+ %ext = zext i1 %cmp to i16
+ %r = add i16 %shr, %ext
+ ret i16 %r
+}
+
+; Negative test: no overflow guarantee - should NOT optimize
+define i32 @ceil_div_by_8_no_overflow_info(i32 %x) {
+; CHECK-LABEL: @ceil_div_by_8_no_overflow_info(
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %shr = lshr i32 %x, 3
+ %and = and i32 %x, 7
+ %cmp = icmp ne i32 %and, 0
+ %ext = zext i1 %cmp to i32
+ %r = add i32 %shr, %ext
+ ret i32 %r
+}
+
+; Negative test: nuw on final add doesn't help
+define i32 @ceil_div_by_8_only_nuw_on_add(i32 %x) {
+; CHECK-LABEL: @ceil_div_by_8_only_nuw_on_add(
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %shr = lshr i32 %x, 3
+ %and = and i32 %x, 7
+ %cmp = icmp ne i32 %and, 0
+ %ext = zext i1 %cmp to i32
+ %r = add nuw i32 %shr, %ext ; nuw here doesn't prove x+7 won't overflow
+ ret i32 %r
+}
+
+; Negative test: wrong mask
+define i32 @ceil_div_wrong_mask(i32 range(i32 0, 100) %x) {
+; CHECK-LABEL: @ceil_div_wrong_mask(
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 6
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %shr = lshr i32 %x, 3
+ %and = and i32 %x, 6 ; Wrong mask: should be 7
+ %cmp = icmp ne i32 %and, 0
+ %ext = zext i1 %cmp to i32
+ %r = add i32 %shr, %ext
+ ret i32 %r
+}
+
+; Negative test: wrong shift amount
+define i32 @ceil_div_wrong_shift(i32 range(i32 0, 100) %x) {
+; CHECK-LABEL: @ceil_div_wrong_shift(
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 4
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %shr = lshr i32 %x, 4 ; Shift by 4, but mask is 7 (should be 15)
+ %and = and i32 %x, 7
+ %cmp = icmp ne i32 %and, 0
+ %ext = zext i1 %cmp to i32
+ %r = add i32 %shr, %ext
+ ret i32 %r
+}
+
+; Negative test: wrong comparison
+define i32 @ceil_div_wrong_cmp(i32 range(i32 0, 100) %x) {
+; CHECK-LABEL: @ceil_div_wrong_cmp(
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %shr = lshr i32 %x, 3
+ %and = and i32 %x, 7
+ %cmp = icmp eq i32 %and, 0 ; Wrong: should be ne
+ %ext = zext i1 %cmp to i32
+ %r = add i32 %shr, %ext
+ ret i32 %r
+}
declare void @fake_func(i32)
>From 2b55fe227d63472a27f6572fc26695e714ae72f4 Mon Sep 17 00:00:00 2001
From: Alex Gaynor <gaynor at anthropic.com>
Date: Thu, 5 Jun 2025 20:49:07 -0400
Subject: [PATCH 2/5] review feedback: make use of m_c_Add and m_LowBitMask
helpers
---
.../InstCombine/InstCombineAddSub.cpp | 25 ++++++-------------
1 file changed, 7 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 16ebd7bceff63..a2f89708009f7 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1791,31 +1791,20 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
// (X >> log2(N)) + zext(X & (N-1) != 0) --> (X + (N-1)) >> log2(N)
// This is valid when adding (N-1) to X doesn't overflow.
{
- Value *X = nullptr, *Cmp = nullptr;
+ Value *X = nullptr, *Cmp = nullptr, *Shift = nullptr;
const APInt *ShiftAmt = nullptr, *Mask = nullptr;
CmpPredicate Pred;
// Match: (X >> C) + zext((X & Mask) != 0)
// or: zext((X & Mask) != 0) + (X >> C)
- Value *Op0 = I.getOperand(0);
- Value *Op1 = I.getOperand(1);
-
- // Try matching with shift on left, zext on right
- bool Matched = false;
- if (match(Op0, m_LShr(m_Value(X), m_APInt(ShiftAmt))) &&
- match(Op1, m_ZExt(m_Value(Cmp)))) {
- Matched = match(Cmp, m_ICmp(Pred, m_And(m_Specific(X), m_APInt(Mask)),
- m_ZeroInt()));
- } else if (match(Op1, m_LShr(m_Value(X), m_APInt(ShiftAmt))) &&
- match(Op0, m_ZExt(m_Value(Cmp)))) {
- Matched = match(Cmp, m_ICmp(Pred, m_And(m_Specific(X), m_APInt(Mask)),
- m_ZeroInt()));
- }
-
- if (Matched &&
+ if (match(&I, m_c_Add(m_Value(Shift), m_ZExt(m_Value(Cmp)))) &&
+ match(Shift, m_LShr(m_Value(X), m_APInt(ShiftAmt))) &&
+ Shift->hasOneUse() &&
+ match(Cmp, m_ICmp(Pred, m_And(m_Specific(X), m_LowBitMask(Mask)),
+ m_ZeroInt())) &&
Pred == ICmpInst::ICMP_NE &&
ShiftAmt && ShiftAmt->uge(1) && ShiftAmt->ult(BitWidth) &&
- Mask && *Mask == (APInt(BitWidth, 1) << *ShiftAmt) - 1) {
+ Mask && Mask->popcount() == *ShiftAmt) {
// Check if X + Mask doesn't overflow
Constant *MaskC = ConstantInt::get(X->getType(), *Mask);
>From 1df936da385fa04b024401937fb45b30c6c32755 Mon Sep 17 00:00:00 2001
From: gaynor-anthropic <gaynor at anthropic.com>
Date: Sat, 7 Jun 2025 13:47:17 -0700
Subject: [PATCH 3/5] code review: apply suggestions
Co-authored-by: Yingwei Zheng <dtcxzyw at qq.com>
---
.../Transforms/InstCombine/InstCombineAddSub.cpp | 14 +++-----------
1 file changed, 3 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index a2f89708009f7..767acc3d3019e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1797,14 +1797,8 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
// Match: (X >> C) + zext((X & Mask) != 0)
// or: zext((X & Mask) != 0) + (X >> C)
- if (match(&I, m_c_Add(m_Value(Shift), m_ZExt(m_Value(Cmp)))) &&
- match(Shift, m_LShr(m_Value(X), m_APInt(ShiftAmt))) &&
- Shift->hasOneUse() &&
- match(Cmp, m_ICmp(Pred, m_And(m_Specific(X), m_LowBitMask(Mask)),
- m_ZeroInt())) &&
- Pred == ICmpInst::ICMP_NE &&
- ShiftAmt && ShiftAmt->uge(1) && ShiftAmt->ult(BitWidth) &&
- Mask && Mask->popcount() == *ShiftAmt) {
+ if (match(&I, m_c_Add(m_OneUse(m_LShr(m_Value(X), m_APInt(ShiftAmt))), m_ZExt(m_SpecificICmp(ICmpInst::ICMP_NE, m_And(m_Deferred(X), m_LowBitMask(Mask)),
+ m_ZeroInt())))) && Mask->popcount() == *ShiftAmt) {
// Check if X + Mask doesn't overflow
Constant *MaskC = ConstantInt::get(X->getType(), *Mask);
@@ -1812,9 +1806,7 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
if (WillNotOverflowUnsigned) {
// (X + Mask) >> ShiftAmt
- bool WillNotOverflowSigned = willNotOverflowSignedAdd(X, MaskC, I);
- Value *Add = Builder.CreateAdd(X, MaskC, "", WillNotOverflowUnsigned,
- WillNotOverflowSigned);
+ Value *Add = Builder.CreateNUWAdd(X, MaskC);
return BinaryOperator::CreateLShr(Add, ConstantInt::get(X->getType(), *ShiftAmt));
}
}
>From 339675ff867b313d5da9b179893e62a8e08b816e Mon Sep 17 00:00:00 2001
From: Alex Gaynor <gaynor at anthropic.com>
Date: Sat, 7 Jun 2025 16:49:39 -0400
Subject: [PATCH 4/5] clang-format
---
.../Transforms/InstCombine/InstCombineAddSub.cpp | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 767acc3d3019e..12994e280f7fb 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1791,14 +1791,18 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
// (X >> log2(N)) + zext(X & (N-1) != 0) --> (X + (N-1)) >> log2(N)
// This is valid when adding (N-1) to X doesn't overflow.
{
- Value *X = nullptr, *Cmp = nullptr, *Shift = nullptr;
+ Value *X = nullptr;
const APInt *ShiftAmt = nullptr, *Mask = nullptr;
CmpPredicate Pred;
// Match: (X >> C) + zext((X & Mask) != 0)
// or: zext((X & Mask) != 0) + (X >> C)
- if (match(&I, m_c_Add(m_OneUse(m_LShr(m_Value(X), m_APInt(ShiftAmt))), m_ZExt(m_SpecificICmp(ICmpInst::ICMP_NE, m_And(m_Deferred(X), m_LowBitMask(Mask)),
- m_ZeroInt())))) && Mask->popcount() == *ShiftAmt) {
+ if (match(&I, m_c_Add(m_OneUse(m_LShr(m_Value(X), m_APInt(ShiftAmt))),
+ m_ZExt(m_SpecificICmp(
+ ICmpInst::ICMP_NE,
+ m_And(m_Deferred(X), m_LowBitMask(Mask)),
+ m_ZeroInt())))) &&
+ Mask->popcount() == *ShiftAmt) {
// Check if X + Mask doesn't overflow
Constant *MaskC = ConstantInt::get(X->getType(), *Mask);
@@ -1807,7 +1811,8 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
if (WillNotOverflowUnsigned) {
// (X + Mask) >> ShiftAmt
Value *Add = Builder.CreateNUWAdd(X, MaskC);
- return BinaryOperator::CreateLShr(Add, ConstantInt::get(X->getType(), *ShiftAmt));
+ return BinaryOperator::CreateLShr(
+ Add, ConstantInt::get(X->getType(), *ShiftAmt));
}
}
}
>From 74fc5e1aed804e783fd20d86e80cd22ade7dde13 Mon Sep 17 00:00:00 2001
From: Alex Gaynor <gaynor at anthropic.com>
Date: Sat, 7 Jun 2025 16:58:00 -0400
Subject: [PATCH 5/5] addeditional test cases
---
llvm/test/Transforms/InstCombine/add.ll | 84 +++++++++++++++++++++++++
1 file changed, 84 insertions(+)
diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll
index d364082eab317..74c022b07a9a3 100644
--- a/llvm/test/Transforms/InstCombine/add.ll
+++ b/llvm/test/Transforms/InstCombine/add.ll
@@ -4430,4 +4430,88 @@ define i32 @ceil_div_wrong_cmp(i32 range(i32 0, 100) %x) {
%r = add i32 %shr, %ext
ret i32 %r
}
+
+; Multi-use test: all intermediate values have uses
+define i32 @ceil_div_multi_use(i32 range(i32 0, 100) %x) {
+; CHECK-LABEL: @ceil_div_multi_use(
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3
+; CHECK-NEXT: call void @use_i32(i32 [[SHR]])
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7
+; CHECK-NEXT: call void @use_i32(i32 [[AND]])
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT: call void @use_i32(i32 [[EXT]])
+; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %shr = lshr i32 %x, 3
+ call void @use_i32(i32 %shr)
+ %and = and i32 %x, 7
+ call void @use_i32(i32 %and)
+ %cmp = icmp ne i32 %and, 0
+ %ext = zext i1 %cmp to i32
+ call void @use_i32(i32 %ext)
+ %r = add i32 %shr, %ext
+ ret i32 %r
+}
+
+; Commuted test: add operands are swapped
+define i32 @ceil_div_commuted(i32 range(i32 0, 100) %x) {
+; CHECK-LABEL: @ceil_div_commuted(
+; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[X:%.*]], 7
+; CHECK-NEXT: [[R:%.*]] = lshr i32 [[TMP1]], 3
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %shr = lshr i32 %x, 3
+ %and = and i32 %x, 7
+ %cmp = icmp ne i32 %and, 0
+ %ext = zext i1 %cmp to i32
+ %r = add i32 %ext, %shr ; Operands swapped
+ ret i32 %r
+}
+
+; Commuted with multi-use
+define i32 @ceil_div_commuted_multi_use(i32 range(i32 0, 100) %x) {
+; CHECK-LABEL: @ceil_div_commuted_multi_use(
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3
+; CHECK-NEXT: call void @use_i32(i32 [[SHR]])
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT: call void @use_i32(i32 [[EXT]])
+; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %shr = lshr i32 %x, 3
+ call void @use_i32(i32 %shr)
+ %and = and i32 %x, 7
+ %cmp = icmp ne i32 %and, 0
+ %ext = zext i1 %cmp to i32
+ call void @use_i32(i32 %ext)
+ %r = add i32 %ext, %shr ; Operands swapped
+ ret i32 %r
+}
+
+; Multi-use with vector type
+define <2 x i32> @ceil_div_vec_multi_use(<2 x i32> range(i32 0, 1000) %x) {
+; CHECK-LABEL: @ceil_div_vec_multi_use(
+; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 3)
+; CHECK-NEXT: call void @use_vec(<2 x i32> [[SHR]])
+; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X]], splat (i32 7)
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[AND]], zeroinitializer
+; CHECK-NEXT: [[EXT:%.*]] = zext <2 x i1> [[CMP]] to <2 x i32>
+; CHECK-NEXT: [[R:%.*]] = add nuw nsw <2 x i32> [[SHR]], [[EXT]]
+; CHECK-NEXT: ret <2 x i32> [[R]]
+;
+ %shr = lshr <2 x i32> %x, <i32 3, i32 3>
+ call void @use_vec(<2 x i32> %shr)
+ %and = and <2 x i32> %x, <i32 7, i32 7>
+ %cmp = icmp ne <2 x i32> %and, <i32 0, i32 0>
+ %ext = zext <2 x i1> %cmp to <2 x i32>
+ %r = add <2 x i32> %shr, %ext
+ ret <2 x i32> %r
+}
+
+declare void @use_i32(i32)
+declare void @use_vec(<2 x i32>)
declare void @fake_func(i32)
More information about the llvm-commits
mailing list