[llvm] dc2b2ae - [InstCombine] Fold cttz of lowest set bit
Nikita Popov via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 14 05:31:46 PDT 2023
Author: Nikita Popov
Date: 2023-07-14T14:31:35+02:00
New Revision: dc2b2ae7dc333f9c3769785fa147c7872adb9bba
URL: https://github.com/llvm/llvm-project/commit/dc2b2ae7dc333f9c3769785fa147c7872adb9bba
DIFF: https://github.com/llvm/llvm-project/commit/dc2b2ae7dc333f9c3769785fa147c7872adb9bba.diff
LOG: [InstCombine] Fold cttz of lowest set bit
cttz(-a & a) is the same as cttz(a). -a & a is an idiom to extract
the lowest set bit, which naturally does not affect the number of
trailing zeroes.
Proof: https://alive2.llvm.org/ce/z/Yp26x7
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
llvm/test/Transforms/InstCombine/cttz.ll
llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll
llvm/test/Transforms/InstCombine/xor.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 37d6a59e364e70..d3ec6a7aa667be 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -537,6 +537,10 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
if (match(Op0, m_Neg(m_Value(X))))
return IC.replaceOperand(II, 0, X);
+ // cttz(-x & x) -> cttz(x)
+ if (match(Op0, m_c_And(m_Neg(m_Value(X)), m_Deferred(X))))
+ return IC.replaceOperand(II, 0, X);
+
// cttz(sext(x)) -> cttz(zext(x))
if (match(Op0, m_OneUse(m_SExt(m_Value(X))))) {
auto *Zext = IC.Builder.CreateZExt(X, II.getType());
diff --git a/llvm/test/Transforms/InstCombine/cttz.ll b/llvm/test/Transforms/InstCombine/cttz.ll
index 74ce48589346c9..4ef286afe26e97 100644
--- a/llvm/test/Transforms/InstCombine/cttz.ll
+++ b/llvm/test/Transforms/InstCombine/cttz.ll
@@ -122,9 +122,7 @@ define <2 x i64> @cttz_sext_zero_def_vec(<2 x i32> %x) {
define i32 @cttz_of_lowest_set_bit(i32 %x) {
; CHECK-LABEL: @cttz_of_lowest_set_bit(
-; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[X:%.*]]
-; CHECK-NEXT: [[AND:%.*]] = and i32 [[SUB]], [[X]]
-; CHECK-NEXT: [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[AND]], i1 false), !range [[RNG1]]
+; CHECK-NEXT: [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false), !range [[RNG1]]
; CHECK-NEXT: ret i32 [[TZ]]
;
%sub = sub i32 0, %x
@@ -136,9 +134,7 @@ define i32 @cttz_of_lowest_set_bit(i32 %x) {
define i32 @cttz_of_lowest_set_bit_commuted(i32 %xx) {
; CHECK-LABEL: @cttz_of_lowest_set_bit_commuted(
; CHECK-NEXT: [[X:%.*]] = udiv i32 42, [[XX:%.*]]
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 0, [[X]]
-; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], [[SUB]]
-; CHECK-NEXT: [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[AND]], i1 false), !range [[RNG1]]
+; CHECK-NEXT: [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[X]], i1 false), !range [[RNG1]]
; CHECK-NEXT: ret i32 [[TZ]]
;
%x = udiv i32 42, %xx ; thwart complexity-based canonicalization
@@ -150,9 +146,7 @@ define i32 @cttz_of_lowest_set_bit_commuted(i32 %xx) {
define i32 @cttz_of_lowest_set_bit_poison_flag(i32 %x) {
; CHECK-LABEL: @cttz_of_lowest_set_bit_poison_flag(
-; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[X:%.*]]
-; CHECK-NEXT: [[AND:%.*]] = and i32 [[SUB]], [[X]]
-; CHECK-NEXT: [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[AND]], i1 true), !range [[RNG1]]
+; CHECK-NEXT: [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true), !range [[RNG1]]
; CHECK-NEXT: ret i32 [[TZ]]
;
%sub = sub i32 0, %x
@@ -163,9 +157,7 @@ define i32 @cttz_of_lowest_set_bit_poison_flag(i32 %x) {
define <2 x i64> @cttz_of_lowest_set_bit_vec(<2 x i64> %x) {
; CHECK-LABEL: @cttz_of_lowest_set_bit_vec(
-; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> zeroinitializer, [[X:%.*]]
-; CHECK-NEXT: [[AND:%.*]] = and <2 x i64> [[SUB]], [[X]]
-; CHECK-NEXT: [[TZ:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[AND]], i1 false), !range [[RNG2]]
+; CHECK-NEXT: [[TZ:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[X:%.*]], i1 false), !range [[RNG2]]
; CHECK-NEXT: ret <2 x i64> [[TZ]]
;
%sub = sub <2 x i64> zeroinitializer, %x
@@ -176,9 +168,7 @@ define <2 x i64> @cttz_of_lowest_set_bit_vec(<2 x i64> %x) {
define <2 x i64> @cttz_of_lowest_set_bit_vec_undef(<2 x i64> %x) {
; CHECK-LABEL: @cttz_of_lowest_set_bit_vec_undef(
-; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> zeroinitializer, [[X:%.*]]
-; CHECK-NEXT: [[AND:%.*]] = and <2 x i64> [[SUB]], [[X]]
-; CHECK-NEXT: [[TZ:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[AND]], i1 false), !range [[RNG2]]
+; CHECK-NEXT: [[TZ:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[X:%.*]], i1 false), !range [[RNG2]]
; CHECK-NEXT: ret <2 x i64> [[TZ]]
;
%sub = sub <2 x i64> zeroinitializer, %x
diff --git a/llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll b/llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll
index f0b36c8b4ef909..45864eeed77220 100644
--- a/llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll
+++ b/llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll
@@ -72,9 +72,7 @@ define <2 x i32> @select_clz_to_ctz_vec(<2 x i32> %a) {
define i32 @select_clz_to_ctz_extra_use(i32 %a) {
; CHECK-LABEL: @select_clz_to_ctz_extra_use(
-; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[A:%.*]]
-; CHECK-NEXT: [[AND:%.*]] = and i32 [[SUB]], [[A]]
-; CHECK-NEXT: [[SUB1:%.*]] = call i32 @llvm.cttz.i32(i32 [[AND]], i1 true), !range [[RNG0]]
+; CHECK-NEXT: [[SUB1:%.*]] = call i32 @llvm.cttz.i32(i32 [[A:%.*]], i1 true), !range [[RNG0]]
; CHECK-NEXT: call void @use(i32 [[SUB1]])
; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.cttz.i32(i32 [[A]], i1 true), !range [[RNG0]]
; CHECK-NEXT: ret i32 [[COND]]
diff --git a/llvm/test/Transforms/InstCombine/xor.ll b/llvm/test/Transforms/InstCombine/xor.ll
index 479714fb478a8e..d4b49afe9efe6f 100644
--- a/llvm/test/Transforms/InstCombine/xor.ll
+++ b/llvm/test/Transforms/InstCombine/xor.ll
@@ -1336,9 +1336,7 @@ define i32 @xor_orn_2use(i32 %a, i32 %b, ptr %s1, ptr %s2) {
define i32 @ctlz_pow2(i32 %x) {
; CHECK-LABEL: @ctlz_pow2(
-; CHECK-NEXT: [[N:%.*]] = sub i32 0, [[X:%.*]]
-; CHECK-NEXT: [[A:%.*]] = and i32 [[N]], [[X]]
-; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.cttz.i32(i32 [[A]], i1 true), !range [[RNG0:![0-9]+]]
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true), !range [[RNG0:![0-9]+]]
; CHECK-NEXT: ret i32 [[R]]
;
%n = sub i32 0, %x
More information about the llvm-commits
mailing list