[llvm] [InstCombine] Fold `cttz(lshr(-1, x) + 1)` to `width - x` (PR #91244)
via llvm-commits
llvm-commits at lists.llvm.org
Mon May 6 10:24:09 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Monad (YanWQ-monad)
<details>
<summary>Changes</summary>
Fold
``` llvm
define i64 @<!-- -->src(i64 %50) {
%52 = lshr i64 -1, %50
%53 = add i64 %52, 1
%54 = call i64 @<!-- -->llvm.cttz.i64(i64 %53, i1 false)
ret i64 %54
}
```
to
``` llvm
define i64 @<!-- -->tgt(i64 %50) {
%52 = sub i64 64, %50
ret i64 %52
}
```
as https://github.com/llvm/llvm-project/pull/91171#pullrequestreview-2040663002 pointed out.
Alive2 proof: https://alive2.llvm.org/ce/z/2aHfYa
Note: the `ctlz` version of this pattern seems not exist in dtcxzyw's benchmark, so put it aside for now.
---
Full diff: https://github.com/llvm/llvm-project/pull/91244.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp (+7)
- (modified) llvm/test/Transforms/InstCombine/cttz.ll (+61)
``````````diff
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 1913ef92c16c0e..d7433ad3599f91 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -562,6 +562,13 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
return BinaryOperator::CreateSub(ConstCttz, X);
}
+
+ // cttz(add(lshr(UINT_MAX, %val), 1)) --> sub(width, %val)
+ if (match(Op0, m_Add(m_LShr(m_AllOnes(), m_Value(X)), m_One()))) {
+ Value *Width =
+ ConstantInt::get(II.getType(), II.getType()->getScalarSizeInBits());
+ return BinaryOperator::CreateSub(Width, X);
+ }
} else {
// ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
if (match(Op0, m_LShr(m_ImmConstant(C), m_Value(X))) &&
diff --git a/llvm/test/Transforms/InstCombine/cttz.ll b/llvm/test/Transforms/InstCombine/cttz.ll
index 3595cff5f1aed0..66b7a03fe5d7be 100644
--- a/llvm/test/Transforms/InstCombine/cttz.ll
+++ b/llvm/test/Transforms/InstCombine/cttz.ll
@@ -215,3 +215,64 @@ define i32 @cttz_of_lowest_set_bit_wrong_intrinsic(i32 %x) {
%tz = call i32 @llvm.ctlz.i32(i32 %and, i1 false)
ret i32 %tz
}
+
+define i32 @cttz_of_power_of_two(i32 %x) {
+; CHECK-LABEL: @cttz_of_power_of_two(
+; CHECK-NEXT: [[R:%.*]] = sub i32 32, [[X:%.*]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %lshr = lshr i32 -1, %x
+ %add = add i32 %lshr, 1
+ %r = call i32 @llvm.cttz.i32(i32 %add, i1 false)
+ ret i32 %r
+}
+
+define i32 @cttz_of_power_of_two_zero_poison(i32 %x) {
+; CHECK-LABEL: @cttz_of_power_of_two_zero_poison(
+; CHECK-NEXT: [[R:%.*]] = sub i32 32, [[X:%.*]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %lshr = lshr i32 -1, %x
+ %add = add i32 %lshr, 1
+ %r = call i32 @llvm.cttz.i32(i32 %add, i1 true)
+ ret i32 %r
+}
+
+define i32 @cttz_of_power_of_two_wrong_intrinsic(i32 %x) {
+; CHECK-LABEL: @cttz_of_power_of_two_wrong_intrinsic(
+; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 -1, [[X:%.*]]
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LSHR]], 1
+; CHECK-NEXT: [[R:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[ADD]], i1 false)
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %lshr = lshr i32 -1, %x
+ %add = add i32 %lshr, 1
+ %r = call i32 @llvm.ctlz.i32(i32 %add, i1 false)
+ ret i32 %r
+}
+
+define i32 @cttz_of_power_of_two_wrong_constant_1(i32 %x) {
+; CHECK-LABEL: @cttz_of_power_of_two_wrong_constant_1(
+; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 -2, [[X:%.*]]
+; CHECK-NEXT: [[ADD:%.*]] = add nuw i32 [[LSHR]], 1
+; CHECK-NEXT: [[R:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[ADD]], i1 true)
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %lshr = lshr i32 -2, %x
+ %add = add i32 %lshr, 1
+ %r = call i32 @llvm.cttz.i32(i32 %add, i1 false)
+ ret i32 %r
+}
+
+define i32 @cttz_of_power_of_two_wrong_constant_2(i32 %x) {
+; CHECK-LABEL: @cttz_of_power_of_two_wrong_constant_2(
+; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 -1, [[X:%.*]]
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[LSHR]], -1
+; CHECK-NEXT: [[R:%.*]] = call range(i32 1, 33) i32 @llvm.cttz.i32(i32 [[ADD]], i1 false)
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %lshr = lshr i32 -1, %x
+ %add = add i32 %lshr, -1
+ %r = call i32 @llvm.cttz.i32(i32 %add, i1 false)
+ ret i32 %r
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/91244
More information about the llvm-commits
mailing list