[llvm] addbdac - [InstCombine] fold power-of-2 ctlz/cttz with inverted result

Thu Sep 1 05:59:43 PDT 2022

Author: Sanjay Patel
Date: 2022-09-01T08:57:55-04:00
New Revision: addbdac5d580eee25146e4a8fda62fa18e6b945d

URL: https://github.com/llvm/llvm-project/commit/addbdac5d580eee25146e4a8fda62fa18e6b945d
DIFF: https://github.com/llvm/llvm-project/commit/addbdac5d580eee25146e4a8fda62fa18e6b945d.diff

LOG: [InstCombine] fold power-of-2 ctlz/cttz with inverted result

When X is a power-of-two or zero and zero input is poison:
ctlz(i32 X) ^ 31 --> cttz(X)
cttz(i32 X) ^ 31 --> ctlz(X)

https://alive2.llvm.org/ce/z/Cs7sFE

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
    llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll
    llvm/test/Transforms/InstCombine/xor.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index b011c5f93979..0fd3bdc1ef66 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3844,6 +3844,21 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
           MaskedValueIsZero(X, *C, 0, &I))
         return BinaryOperator::CreateXor(X, ConstantInt::get(Ty, *C ^ *RHSC));
 
+      // When X is a power-of-two or zero and zero input is poison:
+      // ctlz(i32 X) ^ 31 --> cttz(X)
+      // cttz(i32 X) ^ 31 --> ctlz(X)
+      auto *II = dyn_cast<IntrinsicInst>(Op0);
+      if (II && II->hasOneUse() && *RHSC == Ty->getScalarSizeInBits() - 1) {
+        Intrinsic::ID IID = II->getIntrinsicID();
+        if ((IID == Intrinsic::ctlz || IID == Intrinsic::cttz) &&
+            match(II->getArgOperand(1), m_One()) &&
+            isKnownToBeAPowerOfTwo(II->getArgOperand(0), /*OrZero */ true)) {
+          IID = (IID == Intrinsic::ctlz) ? Intrinsic::cttz : Intrinsic::ctlz;
+          Function *F = Intrinsic::getDeclaration(II->getModule(), IID, Ty);
+          return CallInst::Create(F, {II->getArgOperand(0), Builder.getTrue()});
+        }
+      }
+
       // If RHSC is inverting the remaining bits of shifted X,
       // canonicalize to a 'not' before the shift to help SCEV and codegen:
       // (X << C) ^ RHSC --> ~X << C

diff  --git a/llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll b/llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll
index 7cabfcca06ac..0433bad497ed 100644
--- a/llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll
+++ b/llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll
@@ -60,8 +60,7 @@ define i32 @select_clz_to_ctz_extra_use(i32 %a) {
 ; CHECK-LABEL: @select_clz_to_ctz_extra_use(
 ; CHECK-NEXT:    [[SUB:%.*]] = sub i32 0, [[A:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SUB]], [[A]]
-; CHECK-NEXT:    [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[AND]], i1 true), !range [[RNG0]]
-; CHECK-NEXT:    [[SUB1:%.*]] = xor i32 [[LZ]], 31
+; CHECK-NEXT:    [[SUB1:%.*]] = call i32 @llvm.cttz.i32(i32 [[AND]], i1 true), !range [[RNG0]]
 ; CHECK-NEXT:    call void @use(i32 [[SUB1]])
 ; CHECK-NEXT:    [[COND:%.*]] = call i32 @llvm.cttz.i32(i32 [[A]], i1 true), !range [[RNG0]]
 ; CHECK-NEXT:    ret i32 [[COND]]

diff  --git a/llvm/test/Transforms/InstCombine/xor.ll b/llvm/test/Transforms/InstCombine/xor.ll
index 92d536a8b617..32807bff6808 100644
--- a/llvm/test/Transforms/InstCombine/xor.ll
+++ b/llvm/test/Transforms/InstCombine/xor.ll
@@ -1338,8 +1338,7 @@ define i32 @ctlz_pow2(i32 %x) {
 ; CHECK-LABEL: @ctlz_pow2(
 ; CHECK-NEXT:    [[N:%.*]] = sub i32 0, [[X:%.*]]
 ; CHECK-NEXT:    [[A:%.*]] = and i32 [[N]], [[X]]
-; CHECK-NEXT:    [[Z:%.*]] = call i32 @llvm.ctlz.i32(i32 [[A]], i1 true), !range [[RNG0:![0-9]+]]
-; CHECK-NEXT:    [[R:%.*]] = xor i32 [[Z]], 31
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.cttz.i32(i32 [[A]], i1 true), !range [[RNG0:![0-9]+]]
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %n = sub i32 0, %x
@@ -1349,12 +1348,13 @@ define i32 @ctlz_pow2(i32 %x) {
   ret i32 %r
 }
 
+; TODO: %d is known not zero, so this should fold even with arg1 set to false.
+
 define <2 x i8> @cttz_pow2(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @cttz_pow2(
 ; CHECK-NEXT:    [[S:%.*]] = shl <2 x i8> <i8 1, i8 1>, [[X:%.*]]
 ; CHECK-NEXT:    [[D:%.*]] = udiv exact <2 x i8> [[S]], [[Y:%.*]]
-; CHECK-NEXT:    [[Z:%.*]] = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> [[D]], i1 true)
-; CHECK-NEXT:    [[R:%.*]] = xor <2 x i8> [[Z]], <i8 7, i8 7>
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[D]], i1 true)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %s = shl <2 x i8> <i8 1, i8 1>, %x
@@ -1364,6 +1364,8 @@ define <2 x i8> @cttz_pow2(<2 x i8> %x, <2 x i8> %y) {
   ret <2 x i8> %r
 }
 
+; negative test - 0 input returns 63
+
 define i32 @ctlz_pow2_or_zero(i32 %x) {
 ; CHECK-LABEL: @ctlz_pow2_or_zero(
 ; CHECK-NEXT:    [[N:%.*]] = sub i32 0, [[X:%.*]]
@@ -1379,6 +1381,8 @@ define i32 @ctlz_pow2_or_zero(i32 %x) {
   ret i32 %r
 }
 
+; negative test - must xor with (bitwidth - 1)
+
 define i32 @ctlz_pow2_wrong_const(i32 %x) {
 ; CHECK-LABEL: @ctlz_pow2_wrong_const(
 ; CHECK-NEXT:    [[N:%.*]] = sub i32 0, [[X:%.*]]