[PATCH] D144329: [InstCombine] canonicalize "extract lowest set bit" away from cttz intrinsic

Sanjay Patel via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Sat Feb 18 06:05:13 PST 2023


spatel created this revision.
spatel added reviewers: kazu, goldstein.w.n, nikic.
Herald added subscribers: StephenFan, pengfei, hiraditya, mcrosier.
Herald added a project: All.
spatel requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

1 << (cttz X) --> -X & X
https://alive2.llvm.org/ce/z/qv3E9e

This creates an extra use of the input value, so that's generally not preferred, but there are advantages to this direction:

1. 'negate' and 'and' allow for better analysis than 'cttz'.
2. This is more likely to induce follow-on transforms (in the example from issue #60801 <https://github.com/llvm/llvm-project/issues/60801>, we'll get the decrement pattern).
3. The more basic ALU ops are more likely to result in better codegen across a variety of targets.

This won't solve the motivating bugs (see issue #60799 <https://github.com/llvm/llvm-project/issues/60799>) because we do not recognize the redundant icmp+sel, and the x86 backend may not have the pattern-matching to produce the optimal BMI instructions.


https://reviews.llvm.org/D144329

Files:
  llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
  llvm/test/Transforms/InstCombine/shift.ll


Index: llvm/test/Transforms/InstCombine/shift.ll
===================================================================
--- llvm/test/Transforms/InstCombine/shift.ll
+++ llvm/test/Transforms/InstCombine/shift.ll
@@ -2009,8 +2009,8 @@
 
 define i32 @shl1_cttz(i32 %x) {
 ; CHECK-LABEL: @shl1_cttz(
-; CHECK-NEXT:    [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true), !range [[RNG0:![0-9]+]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 1, [[TZ]]
+; CHECK-NEXT:    [[NEG:%.*]] = sub i32 0, [[X:%.*]]
+; CHECK-NEXT:    [[SHL:%.*]] = and i32 [[NEG]], [[X]]
 ; CHECK-NEXT:    ret i32 [[SHL]]
 ;
   %tz = call i32 @llvm.cttz.i32(i32 %x, i1 true)
@@ -2020,8 +2020,8 @@
 
 define <2 x i8> @shl1_cttz_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @shl1_cttz_vec(
-; CHECK-NEXT:    [[TZ:%.*]] = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> [[X:%.*]], i1 false)
-; CHECK-NEXT:    [[SHL:%.*]] = shl nuw <2 x i8> <i8 1, i8 1>, [[TZ]]
+; CHECK-NEXT:    [[NEG:%.*]] = sub <2 x i8> zeroinitializer, [[X:%.*]]
+; CHECK-NEXT:    [[SHL:%.*]] = and <2 x i8> [[NEG]], [[X]]
 ; CHECK-NEXT:    ret <2 x i8> [[SHL]]
 ;
   %tz = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 false)
@@ -2031,8 +2031,8 @@
 
 define <2 x i8> @shl1_cttz_vec_poison(<2 x i8> %x) {
 ; CHECK-LABEL: @shl1_cttz_vec_poison(
-; CHECK-NEXT:    [[TZ:%.*]] = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> [[X:%.*]], i1 false)
-; CHECK-NEXT:    [[SHL:%.*]] = shl nuw <2 x i8> <i8 1, i8 poison>, [[TZ]]
+; CHECK-NEXT:    [[NEG:%.*]] = sub <2 x i8> zeroinitializer, [[X:%.*]]
+; CHECK-NEXT:    [[SHL:%.*]] = and <2 x i8> [[NEG]], [[X]]
 ; CHECK-NEXT:    ret <2 x i8> [[SHL]]
 ;
   %tz = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 false)
@@ -2040,9 +2040,11 @@
   ret <2 x i8> %shl
 }
 
+; negative test - extra use
+
 define i32 @shl1_cttz_extra_use(i32 %x) {
 ; CHECK-LABEL: @shl1_cttz_extra_use(
-; CHECK-NEXT:    [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false), !range [[RNG0]]
+; CHECK-NEXT:    [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false), !range [[RNG0:![0-9]+]]
 ; CHECK-NEXT:    call void @use_i32(i32 [[TZ]])
 ; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 1, [[TZ]]
 ; CHECK-NEXT:    ret i32 [[SHL]]
@@ -2053,6 +2055,8 @@
   ret i32 %shl
 }
 
+; negative test - must be shift-left of 1
+
 define i32 @shl2_cttz(i32 %x) {
 ; CHECK-LABEL: @shl2_cttz(
 ; CHECK-NEXT:    [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true), !range [[RNG0]]
Index: llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -1150,6 +1150,14 @@
       return BinaryOperator::CreateLShr(
           ConstantInt::get(Ty, APInt::getSignMask(BitWidth)), X);
 
+    // Canonicalize "extract lowest set bit" using cttz to and-with-negate:
+    // 1 << (cttz X) --> -X & X
+    if (match(Op1,
+              m_OneUse(m_Intrinsic<Intrinsic::cttz>(m_Value(X), m_Value())))) {
+      Value *NegX = Builder.CreateNeg(X, "neg");
+      return BinaryOperator::CreateAnd(NegX, X);
+    }
+
     // The only way to shift out the 1 is with an over-shift, so that would
     // be poison with or without "nuw". Undef is excluded because (undef << X)
     // is not undef (it is zero).


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D144329.498580.patch
Type: text/x-patch
Size: 3302 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230218/a6c00bd4/attachment.bin>


More information about the llvm-commits mailing list