[llvm] 8e8467d - [InstCombine] canonicalize "extract lowest set bit" away from cttz intrinsic

Sun Feb 19 15:11:10 PST 2023

Author: Sanjay Patel
Date: 2023-02-19T17:29:40-05:00
New Revision: 8e8467d9d8033e22a98aec3bc20969bd3a59ab28

URL: https://github.com/llvm/llvm-project/commit/8e8467d9d8033e22a98aec3bc20969bd3a59ab28
DIFF: https://github.com/llvm/llvm-project/commit/8e8467d9d8033e22a98aec3bc20969bd3a59ab28.diff

LOG: [InstCombine] canonicalize "extract lowest set bit" away from cttz intrinsic

1 << (cttz X) --> -X & X
https://alive2.llvm.org/ce/z/qv3E9e

This creates an extra use of the input value, so that's generally
not preferred, but there are advantages to this direction:
1. 'negate' and 'and' allow for better analysis than 'cttz'.
2. This is more likely to induce follow-on transforms (in the
   example from issue #60801, we'll get the decrement pattern).
3. The more basic ALU ops are more likely to result in better
   codegen across a variety of targets.

This won't solve the motivating bugs (see issue #60799) because
we do not recognize the redundant icmp+sel, and the x86 backend
may not have the pattern-matching to produce the optimal BMI
instructions.

Differential Revision: https://reviews.llvm.org/D144329

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
    llvm/test/Transforms/InstCombine/shift.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 3dd8414e950ae..f8d4e05e9eb8d 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -1150,6 +1150,14 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {
       return BinaryOperator::CreateLShr(
           ConstantInt::get(Ty, APInt::getSignMask(BitWidth)), X);
 
+    // Canonicalize "extract lowest set bit" using cttz to and-with-negate:
+    // 1 << (cttz X) --> -X & X
+    if (match(Op1,
+              m_OneUse(m_Intrinsic<Intrinsic::cttz>(m_Value(X), m_Value())))) {
+      Value *NegX = Builder.CreateNeg(X, "neg");
+      return BinaryOperator::CreateAnd(NegX, X);
+    }
+
     // The only way to shift out the 1 is with an over-shift, so that would
     // be poison with or without "nuw". Undef is excluded because (undef << X)
     // is not undef (it is zero).

diff  --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll
index 8d2e4d97bad28..652b2a095e8d5 100644
--- a/llvm/test/Transforms/InstCombine/shift.ll
+++ b/llvm/test/Transforms/InstCombine/shift.ll
@@ -2009,8 +2009,8 @@ define i32 @ashr_sdiv_extra_use(i32 %x) {
 
 define i32 @shl1_cttz(i32 %x) {
 ; CHECK-LABEL: @shl1_cttz(
-; CHECK-NEXT:    [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true), !range [[RNG0:![0-9]+]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 1, [[TZ]]
+; CHECK-NEXT:    [[NEG:%.*]] = sub i32 0, [[X:%.*]]
+; CHECK-NEXT:    [[SHL:%.*]] = and i32 [[NEG]], [[X]]
 ; CHECK-NEXT:    ret i32 [[SHL]]
 ;
   %tz = call i32 @llvm.cttz.i32(i32 %x, i1 true)
@@ -2020,8 +2020,8 @@ define i32 @shl1_cttz(i32 %x) {
 
 define <2 x i8> @shl1_cttz_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @shl1_cttz_vec(
-; CHECK-NEXT:    [[TZ:%.*]] = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> [[X:%.*]], i1 false)
-; CHECK-NEXT:    [[SHL:%.*]] = shl nuw <2 x i8> <i8 1, i8 1>, [[TZ]]
+; CHECK-NEXT:    [[NEG:%.*]] = sub <2 x i8> zeroinitializer, [[X:%.*]]
+; CHECK-NEXT:    [[SHL:%.*]] = and <2 x i8> [[NEG]], [[X]]
 ; CHECK-NEXT:    ret <2 x i8> [[SHL]]
 ;
   %tz = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 false)
@@ -2031,8 +2031,8 @@ define <2 x i8> @shl1_cttz_vec(<2 x i8> %x) {
 
 define <2 x i8> @shl1_cttz_vec_poison(<2 x i8> %x) {
 ; CHECK-LABEL: @shl1_cttz_vec_poison(
-; CHECK-NEXT:    [[TZ:%.*]] = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> [[X:%.*]], i1 false)
-; CHECK-NEXT:    [[SHL:%.*]] = shl nuw <2 x i8> <i8 1, i8 poison>, [[TZ]]
+; CHECK-NEXT:    [[NEG:%.*]] = sub <2 x i8> zeroinitializer, [[X:%.*]]
+; CHECK-NEXT:    [[SHL:%.*]] = and <2 x i8> [[NEG]], [[X]]
 ; CHECK-NEXT:    ret <2 x i8> [[SHL]]
 ;
   %tz = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 false)
@@ -2040,9 +2040,11 @@ define <2 x i8> @shl1_cttz_vec_poison(<2 x i8> %x) {
   ret <2 x i8> %shl
 }
 
+; negative test - extra use
+
 define i32 @shl1_cttz_extra_use(i32 %x) {
 ; CHECK-LABEL: @shl1_cttz_extra_use(
-; CHECK-NEXT:    [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false), !range [[RNG0]]
+; CHECK-NEXT:    [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false), !range [[RNG0:![0-9]+]]
 ; CHECK-NEXT:    call void @use_i32(i32 [[TZ]])
 ; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 1, [[TZ]]
 ; CHECK-NEXT:    ret i32 [[SHL]]
@@ -2053,6 +2055,8 @@ define i32 @shl1_cttz_extra_use(i32 %x) {
   ret i32 %shl
 }
 
+; negative test - must be shift-left of 1
+
 define i32 @shl2_cttz(i32 %x) {
 ; CHECK-LABEL: @shl2_cttz(
 ; CHECK-NEXT:    [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true), !range [[RNG0]]