[llvm] dc2b2ae - [InstCombine] Fold cttz of lowest set bit

Fri Jul 14 05:31:46 PDT 2023

Author: Nikita Popov
Date: 2023-07-14T14:31:35+02:00
New Revision: dc2b2ae7dc333f9c3769785fa147c7872adb9bba

URL: https://github.com/llvm/llvm-project/commit/dc2b2ae7dc333f9c3769785fa147c7872adb9bba
DIFF: https://github.com/llvm/llvm-project/commit/dc2b2ae7dc333f9c3769785fa147c7872adb9bba.diff

LOG: [InstCombine] Fold cttz of lowest set bit

cttz(-a & a) is the same as cttz(a). -a & a is an idiom to extract
the lowest set bit, which naturally does not affect the number of
trailing zeroes.

Proof: https://alive2.llvm.org/ce/z/Yp26x7

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
    llvm/test/Transforms/InstCombine/cttz.ll
    llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll
    llvm/test/Transforms/InstCombine/xor.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 37d6a59e364e70..d3ec6a7aa667be 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -537,6 +537,10 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
     if (match(Op0, m_Neg(m_Value(X))))
       return IC.replaceOperand(II, 0, X);
 
+    // cttz(-x & x) -> cttz(x)
+    if (match(Op0, m_c_And(m_Neg(m_Value(X)), m_Deferred(X))))
+      return IC.replaceOperand(II, 0, X);
+
     // cttz(sext(x)) -> cttz(zext(x))
     if (match(Op0, m_OneUse(m_SExt(m_Value(X))))) {
       auto *Zext = IC.Builder.CreateZExt(X, II.getType());

diff  --git a/llvm/test/Transforms/InstCombine/cttz.ll b/llvm/test/Transforms/InstCombine/cttz.ll
index 74ce48589346c9..4ef286afe26e97 100644
--- a/llvm/test/Transforms/InstCombine/cttz.ll
+++ b/llvm/test/Transforms/InstCombine/cttz.ll
@@ -122,9 +122,7 @@ define <2 x i64> @cttz_sext_zero_def_vec(<2 x i32> %x) {
 
 define i32 @cttz_of_lowest_set_bit(i32 %x) {
 ; CHECK-LABEL: @cttz_of_lowest_set_bit(
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 0, [[X:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SUB]], [[X]]
-; CHECK-NEXT:    [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[AND]], i1 false), !range [[RNG1]]
+; CHECK-NEXT:    [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false), !range [[RNG1]]
 ; CHECK-NEXT:    ret i32 [[TZ]]
 ;
   %sub = sub i32 0, %x
@@ -136,9 +134,7 @@ define i32 @cttz_of_lowest_set_bit(i32 %x) {
 define i32 @cttz_of_lowest_set_bit_commuted(i32 %xx) {
 ; CHECK-LABEL: @cttz_of_lowest_set_bit_commuted(
 ; CHECK-NEXT:    [[X:%.*]] = udiv i32 42, [[XX:%.*]]
-; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X]], [[SUB]]
-; CHECK-NEXT:    [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[AND]], i1 false), !range [[RNG1]]
+; CHECK-NEXT:    [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[X]], i1 false), !range [[RNG1]]
 ; CHECK-NEXT:    ret i32 [[TZ]]
 ;
   %x = udiv i32 42, %xx ; thwart complexity-based canonicalization
@@ -150,9 +146,7 @@ define i32 @cttz_of_lowest_set_bit_commuted(i32 %xx) {
 
 define i32 @cttz_of_lowest_set_bit_poison_flag(i32 %x) {
 ; CHECK-LABEL: @cttz_of_lowest_set_bit_poison_flag(
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 0, [[X:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SUB]], [[X]]
-; CHECK-NEXT:    [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[AND]], i1 true), !range [[RNG1]]
+; CHECK-NEXT:    [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true), !range [[RNG1]]
 ; CHECK-NEXT:    ret i32 [[TZ]]
 ;
   %sub = sub i32 0, %x
@@ -163,9 +157,7 @@ define i32 @cttz_of_lowest_set_bit_poison_flag(i32 %x) {
 
 define <2 x i64> @cttz_of_lowest_set_bit_vec(<2 x i64> %x) {
 ; CHECK-LABEL: @cttz_of_lowest_set_bit_vec(
-; CHECK-NEXT:    [[SUB:%.*]] = sub <2 x i64> zeroinitializer, [[X:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i64> [[SUB]], [[X]]
-; CHECK-NEXT:    [[TZ:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[AND]], i1 false), !range [[RNG2]]
+; CHECK-NEXT:    [[TZ:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[X:%.*]], i1 false), !range [[RNG2]]
 ; CHECK-NEXT:    ret <2 x i64> [[TZ]]
 ;
   %sub = sub <2 x i64> zeroinitializer, %x
@@ -176,9 +168,7 @@ define <2 x i64> @cttz_of_lowest_set_bit_vec(<2 x i64> %x) {
 
 define <2 x i64> @cttz_of_lowest_set_bit_vec_undef(<2 x i64> %x) {
 ; CHECK-LABEL: @cttz_of_lowest_set_bit_vec_undef(
-; CHECK-NEXT:    [[SUB:%.*]] = sub <2 x i64> zeroinitializer, [[X:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i64> [[SUB]], [[X]]
-; CHECK-NEXT:    [[TZ:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[AND]], i1 false), !range [[RNG2]]
+; CHECK-NEXT:    [[TZ:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[X:%.*]], i1 false), !range [[RNG2]]
 ; CHECK-NEXT:    ret <2 x i64> [[TZ]]
 ;
   %sub = sub <2 x i64> zeroinitializer, %x

diff  --git a/llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll b/llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll
index f0b36c8b4ef909..45864eeed77220 100644
--- a/llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll
+++ b/llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll
@@ -72,9 +72,7 @@ define <2 x i32> @select_clz_to_ctz_vec(<2 x i32> %a) {
 
 define i32 @select_clz_to_ctz_extra_use(i32 %a) {
 ; CHECK-LABEL: @select_clz_to_ctz_extra_use(
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 0, [[A:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SUB]], [[A]]
-; CHECK-NEXT:    [[SUB1:%.*]] = call i32 @llvm.cttz.i32(i32 [[AND]], i1 true), !range [[RNG0]]
+; CHECK-NEXT:    [[SUB1:%.*]] = call i32 @llvm.cttz.i32(i32 [[A:%.*]], i1 true), !range [[RNG0]]
 ; CHECK-NEXT:    call void @use(i32 [[SUB1]])
 ; CHECK-NEXT:    [[COND:%.*]] = call i32 @llvm.cttz.i32(i32 [[A]], i1 true), !range [[RNG0]]
 ; CHECK-NEXT:    ret i32 [[COND]]

diff  --git a/llvm/test/Transforms/InstCombine/xor.ll b/llvm/test/Transforms/InstCombine/xor.ll
index 479714fb478a8e..d4b49afe9efe6f 100644
--- a/llvm/test/Transforms/InstCombine/xor.ll
+++ b/llvm/test/Transforms/InstCombine/xor.ll
@@ -1336,9 +1336,7 @@ define i32 @xor_orn_2use(i32 %a, i32 %b, ptr %s1, ptr %s2) {
 
 define i32 @ctlz_pow2(i32 %x) {
 ; CHECK-LABEL: @ctlz_pow2(
-; CHECK-NEXT:    [[N:%.*]] = sub i32 0, [[X:%.*]]
-; CHECK-NEXT:    [[A:%.*]] = and i32 [[N]], [[X]]
-; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.cttz.i32(i32 [[A]], i1 true), !range [[RNG0:![0-9]+]]
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true), !range [[RNG0:![0-9]+]]
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %n = sub i32 0, %x