[llvm] [InstCombine] Fold the `log2_ceil` idiom (PR #76661)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Dec 31 07:34:15 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Yingwei Zheng (dtcxzyw)
<details>
<summary>Changes</summary>
This patch folds the `log2_ceil` idiom:
```
(BW - ctlz(A)) + (is_power2(A) ? 0 : 1) ->
zext(ctpop(A, true) >u/!= 1) + (ctlz(A) ^ (BW - 1)) (canonical form) ->
BW - ctlz(A - 1)
```
Alive2: https://alive2.llvm.org/ce/z/6mSbdi
NOTE: I don't know where to add one-use constraints because this pattern is too complex. But I don't see any regression in my benchmark.
---
Full diff: https://github.com/llvm/llvm-project/pull/76661.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp (+22)
- (added) llvm/test/Transforms/InstCombine/fold-log2-ceil-idiom.ll (+221)
``````````diff
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 556fde37efeb2d..a529d2234f9689 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1730,6 +1730,28 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
I, Builder.CreateIntrinsic(Intrinsic::ctpop, {I.getType()},
{Builder.CreateOr(A, B)}));
+ // Fold the log2_ceil idiom:
+ // zext(ctpop(A, true) >u/!= 1) + (ctlz(A) ^ (BW - 1)) --> BW - ctlz(A - 1)
+ // TODO: Add one-use checks?
+ const APInt *XorC;
+ if (match(
+ &I,
+ m_c_Add(m_ZExt(m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Value(A)),
+ m_One())),
+ m_ZExtOrSelf(m_Xor(m_TruncOrSelf(m_Intrinsic<Intrinsic::ctlz>(
+ m_Deferred(A), m_One())),
+ m_APInt(XorC))))) &&
+ (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_NE) &&
+ *XorC == A->getType()->getScalarSizeInBits() - 1) {
+ Value *Sub = Builder.CreateAdd(A, Constant::getAllOnesValue(A->getType()));
+ Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {A->getType()},
+ {Sub, Builder.getFalse()});
+ Value *Ret = Builder.CreateSub(
+ ConstantInt::get(A->getType(), A->getType()->getScalarSizeInBits()),
+ Ctlz, "", /*HasNUW*/ true, /*HasNSW*/ true);
+ return replaceInstUsesWith(I, Builder.CreateZExtOrTrunc(Ret, I.getType()));
+ }
+
if (Instruction *Res = foldSquareSumInt(I))
return Res;
diff --git a/llvm/test/Transforms/InstCombine/fold-log2-ceil-idiom.ll b/llvm/test/Transforms/InstCombine/fold-log2-ceil-idiom.ll
new file mode 100644
index 00000000000000..7e5977d67195a4
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-log2-ceil-idiom.ll
@@ -0,0 +1,221 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i32 @log2_ceil_idiom(i32 %x) {
+; CHECK-LABEL: define i32 @log2_ceil_idiom(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -1
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0:![0-9]+]]
+; CHECK-NEXT: [[RET:%.*]] = sub nuw nsw i32 32, [[TMP2]]
+; CHECK-NEXT: ret i32 [[RET]]
+;
+ %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+ %xor = xor i32 %ctlz, 31
+ %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp ugt i32 %ctpop, 1
+ %zext = zext i1 %cmp to i32
+ %ret = add i32 %xor, %zext
+ ret i32 %ret
+}
+
+define i5 @log2_ceil_idiom_trunc(i32 %x) {
+; CHECK-LABEL: define i5 @log2_ceil_idiom_trunc(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -1
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0]]
+; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i32 0, [[TMP2]]
+; CHECK-NEXT: [[RET:%.*]] = trunc i32 [[TMP3]] to i5
+; CHECK-NEXT: ret i5 [[RET]]
+;
+ %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+ %trunc = trunc i32 %ctlz to i5
+ %xor = xor i5 %trunc, 31
+ %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp ugt i32 %ctpop, 1
+ %zext = zext i1 %cmp to i5
+ %ret = add i5 %xor, %zext
+ ret i5 %ret
+}
+
+define i64 @log2_ceil_idiom_zext(i32 %x) {
+; CHECK-LABEL: define i64 @log2_ceil_idiom_zext(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -1
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0]]
+; CHECK-NEXT: [[TMP3:%.*]] = sub nuw nsw i32 32, [[TMP2]]
+; CHECK-NEXT: [[RET:%.*]] = zext i32 [[TMP3]] to i64
+; CHECK-NEXT: ret i64 [[RET]]
+;
+ %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+ %xor = xor i32 %ctlz, 31
+ %ext = zext nneg i32 %xor to i64
+ %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp ugt i32 %ctpop, 1
+ %zext = zext i1 %cmp to i64
+ %ret = add i64 %ext, %zext
+ ret i64 %ret
+}
+
+define i32 @log2_ceil_idiom_power2_test2(i32 %x) {
+; CHECK-LABEL: define i32 @log2_ceil_idiom_power2_test2(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -1
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0]]
+; CHECK-NEXT: [[RET:%.*]] = sub nuw nsw i32 32, [[TMP2]]
+; CHECK-NEXT: ret i32 [[RET]]
+;
+ %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+ %xor = xor i32 %ctlz, 31
+ %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp ne i32 %ctpop, 1
+ %zext = zext i1 %cmp to i32
+ %ret = add i32 %xor, %zext
+ ret i32 %ret
+}
+
+define i32 @log2_ceil_idiom_commuted(i32 %x) {
+; CHECK-LABEL: define i32 @log2_ceil_idiom_commuted(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], -1
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 false), !range [[RNG0]]
+; CHECK-NEXT: [[RET:%.*]] = sub nuw nsw i32 32, [[TMP2]]
+; CHECK-NEXT: ret i32 [[RET]]
+;
+ %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+ %xor = xor i32 %ctlz, 31
+ %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp ugt i32 %ctpop, 1
+ %zext = zext i1 %cmp to i32
+ %ret = add i32 %zext, %xor
+ ret i32 %ret
+}
+
+; Negative tests
+
+define i32 @log2_ceil_idiom_x_may_be_zero(i32 %x) {
+; CHECK-LABEL: define i32 @log2_ceil_idiom_x_may_be_zero(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 false), !range [[RNG0]]
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[CTLZ]], 31
+; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
+; CHECK-NEXT: ret i32 [[RET]]
+;
+ %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+ %xor = xor i32 %ctlz, 31
+ %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp ugt i32 %ctpop, 1
+ %zext = zext i1 %cmp to i32
+ %ret = add i32 %xor, %zext
+ ret i32 %ret
+}
+
+define i4 @log2_ceil_idiom_trunc_too_short(i32 %x) {
+; CHECK-LABEL: define i4 @log2_ceil_idiom_trunc_too_short(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[CTLZ]] to i4
+; CHECK-NEXT: [[XOR:%.*]] = xor i4 [[TRUNC]], -1
+; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i4
+; CHECK-NEXT: [[RET:%.*]] = add i4 [[XOR]], [[ZEXT]]
+; CHECK-NEXT: ret i4 [[RET]]
+;
+ %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+ %trunc = trunc i32 %ctlz to i4
+ %xor = xor i4 %trunc, 31
+ %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp ugt i32 %ctpop, 1
+ %zext = zext i1 %cmp to i4
+ %ret = add i4 %xor, %zext
+ ret i4 %ret
+}
+
+define i32 @log2_ceil_idiom_mismatched_operands(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @log2_ceil_idiom_mismatched_operands(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[CTLZ]], 31
+; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[Y]]), !range [[RNG0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
+; CHECK-NEXT: ret i32 [[RET]]
+;
+ %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+ %xor = xor i32 %ctlz, 31
+ %ctpop = tail call i32 @llvm.ctpop.i32(i32 %y)
+ %cmp = icmp ugt i32 %ctpop, 1
+ %zext = zext i1 %cmp to i32
+ %ret = add i32 %xor, %zext
+ ret i32 %ret
+}
+
+define i32 @log2_ceil_idiom_wrong_constant(i32 %x) {
+; CHECK-LABEL: define i32 @log2_ceil_idiom_wrong_constant(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[CTLZ]], 30
+; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 1
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
+; CHECK-NEXT: ret i32 [[RET]]
+;
+ %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+ %xor = xor i32 %ctlz, 30
+ %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp ugt i32 %ctpop, 1
+ %zext = zext i1 %cmp to i32
+ %ret = add i32 %xor, %zext
+ ret i32 %ret
+}
+
+define i32 @log2_ceil_idiom_not_a_power2_test1(i32 %x) {
+; CHECK-LABEL: define i32 @log2_ceil_idiom_not_a_power2_test1(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[CTLZ]], 31
+; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CTPOP]], 1
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
+; CHECK-NEXT: ret i32 [[RET]]
+;
+ %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+ %xor = xor i32 %ctlz, 31
+ %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp eq i32 %ctpop, 1
+ %zext = zext i1 %cmp to i32
+ %ret = add i32 %xor, %zext
+ ret i32 %ret
+}
+
+define i32 @log2_ceil_idiom_not_a_power2_test2(i32 %x) {
+; CHECK-LABEL: define i32 @log2_ceil_idiom_not_a_power2_test2(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true), !range [[RNG0]]
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[CTLZ]], 31
+; CHECK-NEXT: [[CTPOP:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[CTPOP]], 2
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i32 [[XOR]], [[ZEXT]]
+; CHECK-NEXT: ret i32 [[RET]]
+;
+ %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+ %xor = xor i32 %ctlz, 31
+ %ctpop = tail call i32 @llvm.ctpop.i32(i32 %x)
+ %cmp = icmp ugt i32 %ctpop, 2
+ %zext = zext i1 %cmp to i32
+ %ret = add i32 %xor, %zext
+ ret i32 %ret
+}
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i32 @llvm.ctpop.i32(i32)
+;.
+; CHECK: [[RNG0]] = !{i32 0, i32 33}
+;.
``````````
</details>
https://github.com/llvm/llvm-project/pull/76661
More information about the llvm-commits
mailing list