[llvm] 827f8a7 - Add opt with ctlz and shifts of power of 2 constants (#74175)

via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 7 23:06:27 PST 2023


Author: Sizov Nikita
Date: 2023-12-08T15:06:23+08:00
New Revision: 827f8a7ef6ddcade0700311793510e3b3e0829f0

URL: https://github.com/llvm/llvm-project/commit/827f8a7ef6ddcade0700311793510e3b3e0829f0
DIFF: https://github.com/llvm/llvm-project/commit/827f8a7ef6ddcade0700311793510e3b3e0829f0.diff

LOG: Add opt with ctlz and shifts of power of 2 constants (#74175)

This patch does the following simplifications:
```
cttz(shl(C, X), 1) -> add(cttz(C, 1), X)
cttz(lshr exact(C, X), 1) -> sub(cttz(C, 1), X)
ctlz(lshr(C, X), 1) --> add(ctlz(C, 1), X)
ctlz(shl nuw (C, X), 1) --> sub(ctlz(C, 1), X)
```
Alive2: https://alive2.llvm.org/ce/z/9KHlKc
Closes #41333

Added: 
    llvm/test/Transforms/InstCombine/ctlz-cttz-shifts.ll

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index a991f0906052a..255ce6973a16f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -514,6 +514,8 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
     return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType()));
   }
 
+  Constant *C;
+
   if (IsTZ) {
     // cttz(-x) -> cttz(x)
     if (match(Op0, m_Neg(m_Value(X))))
@@ -549,6 +551,38 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
 
     if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(X))))
       return IC.replaceOperand(II, 0, X);
+
+    // cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val)
+    if (match(Op0, m_Shl(m_ImmConstant(C), m_Value(X))) &&
+        match(Op1, m_One())) {
+      Value *ConstCttz =
+          IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
+      return BinaryOperator::CreateAdd(ConstCttz, X);
+    }
+
+    // cttz(lshr exact (%const, %val), 1) --> sub(cttz(%const, 1), %val)
+    if (match(Op0, m_Exact(m_LShr(m_ImmConstant(C), m_Value(X)))) &&
+        match(Op1, m_One())) {
+      Value *ConstCttz =
+          IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
+      return BinaryOperator::CreateSub(ConstCttz, X);
+    }
+  } else {
+    // ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
+    if (match(Op0, m_LShr(m_ImmConstant(C), m_Value(X))) &&
+        match(Op1, m_One())) {
+      Value *ConstCtlz =
+          IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
+      return BinaryOperator::CreateAdd(ConstCtlz, X);
+    }
+
+    // ctlz(shl nuw (%const, %val), 1) --> sub(ctlz(%const, 1), %val)
+    if (match(Op0, m_NUWShl(m_ImmConstant(C), m_Value(X))) &&
+        match(Op1, m_One())) {
+      Value *ConstCtlz =
+          IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
+      return BinaryOperator::CreateSub(ConstCtlz, X);
+    }
   }
 
   KnownBits Known = IC.computeKnownBits(Op0, 0, &II);

diff  --git a/llvm/test/Transforms/InstCombine/ctlz-cttz-shifts.ll b/llvm/test/Transforms/InstCombine/ctlz-cttz-shifts.ll
new file mode 100644
index 0000000000000..86fef51872b19
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/ctlz-cttz-shifts.ll
@@ -0,0 +1,234 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=instcombine -S < %s | FileCheck %s
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1)
+declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1)
+
+define i32 @lshr_ctlz_true(i32) {
+; CHECK-LABEL: define i32 @lshr_ctlz_true(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = add i32 [[TMP0]], 9
+; CHECK-NEXT:    ret i32 [[CTLZ]]
+;
+  %lshr = lshr i32 8387584, %0
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %lshr, i1 true)
+  ret i32 %ctlz
+}
+
+define i32 @shl_nuw_ctlz_true(i32) {
+; CHECK-LABEL: define i32 @shl_nuw_ctlz_true(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = sub i32 9, [[TMP0]]
+; CHECK-NEXT:    ret i32 [[CTLZ]]
+;
+  %shl = shl nuw i32 8387584, %0
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %shl, i1 true)
+  ret i32 %ctlz
+}
+
+define i32 @shl_nuw_nsw_ctlz_true(i32) {
+; CHECK-LABEL: define i32 @shl_nuw_nsw_ctlz_true(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = sub i32 9, [[TMP0]]
+; CHECK-NEXT:    ret i32 [[CTLZ]]
+;
+  %shl = shl nuw nsw i32 8387584, %0
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %shl, i1 true)
+  ret i32 %ctlz
+}
+
+define i32 @lshr_exact_cttz_true(i32) {
+; CHECK-LABEL: define i32 @lshr_exact_cttz_true(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[CTTZ:%.*]] = sub i32 10, [[TMP0]]
+; CHECK-NEXT:    ret i32 [[CTTZ]]
+;
+  %lshr = lshr exact i32 8387584, %0
+  %cttz = call i32 @llvm.cttz.i32(i32 %lshr, i1 true)
+  ret i32 %cttz
+}
+
+define i32 @shl_cttz_true(i32) {
+; CHECK-LABEL: define i32 @shl_cttz_true(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[CTTZ:%.*]] = add i32 [[TMP0]], 10
+; CHECK-NEXT:    ret i32 [[CTTZ]]
+;
+  %shl = shl i32 8387584, %0
+  %cttz = call i32 @llvm.cttz.i32(i32 %shl, i1 true)
+  ret i32 %cttz
+}
+
+define <2 x i32> @vec2_lshr_ctlz_true(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_lshr_ctlz_true(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = add <2 x i32> [[TMP0]], <i32 9, i32 9>
+; CHECK-NEXT:    ret <2 x i32> [[CTLZ]]
+;
+  %div = lshr <2 x i32> <i32 8387584, i32 4276440>, %0
+  %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %div, i1 true)
+  ret <2 x i32> %ctlz
+}
+
+define <2 x i32> @vec2_shl_nuw_ctlz_true(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_shl_nuw_ctlz_true(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = sub <2 x i32> <i32 9, i32 9>, [[TMP0]]
+; CHECK-NEXT:    ret <2 x i32> [[CTLZ]]
+;
+  %shl = shl nuw <2 x i32> <i32 8387584, i32 4276440>, %0
+  %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %shl, i1 true)
+  ret <2 x i32> %ctlz
+}
+
+define <2 x i32> @vec2_shl_nuw_nsw_ctlz_true(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_shl_nuw_nsw_ctlz_true(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = sub <2 x i32> <i32 9, i32 9>, [[TMP0]]
+; CHECK-NEXT:    ret <2 x i32> [[CTLZ]]
+;
+  %shl = shl nuw nsw <2 x i32> <i32 8387584, i32 4276440>, %0
+  %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %shl, i1 true)
+  ret <2 x i32> %ctlz
+}
+
+define <2 x i32> @vec2_lshr_exact_cttz_true(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_lshr_exact_cttz_true(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[CTTZ:%.*]] = sub <2 x i32> <i32 10, i32 3>, [[TMP0]]
+; CHECK-NEXT:    ret <2 x i32> [[CTTZ]]
+;
+  %lshr = lshr exact <2 x i32> <i32 8387584, i32 4276440>, %0
+  %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %lshr, i1 true)
+  ret <2 x i32> %cttz
+}
+
+define <2 x i32> @vec2_shl_cttz_true(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_shl_cttz_true(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[CTTZ:%.*]] = add <2 x i32> [[TMP0]], <i32 10, i32 3>
+; CHECK-NEXT:    ret <2 x i32> [[CTTZ]]
+;
+  %shl = shl <2 x i32> <i32 8387584, i32 4276440>, %0
+  %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %shl, i1 true)
+  ret <2 x i32> %cttz
+}
+
+; negative tests:
+
+define <2 x i32> @vec2_shl_nsw_ctlz_true_neg(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_shl_nsw_ctlz_true_neg(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[SHL:%.*]] = shl nsw <2 x i32> <i32 8387584, i32 4276440>, [[TMP0]]
+; CHECK-NEXT:    [[CTLZ:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[SHL]], i1 true), !range [[RNG0:![0-9]+]]
+; CHECK-NEXT:    ret <2 x i32> [[CTLZ]]
+;
+  %shl = shl nsw <2 x i32> <i32 8387584, i32 4276440>, %0
+  %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %shl, i1 true)
+  ret <2 x i32> %ctlz
+}
+
+define <2 x i32> @vec2_lshr_ctlz_false_neg(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_lshr_ctlz_false_neg(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[DIV:%.*]] = lshr <2 x i32> <i32 8387584, i32 4276440>, [[TMP0]]
+; CHECK-NEXT:    [[CTLZ:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[DIV]], i1 false), !range [[RNG1:![0-9]+]]
+; CHECK-NEXT:    ret <2 x i32> [[CTLZ]]
+;
+  %div = lshr <2 x i32> <i32 8387584, i32 4276440>, %0
+  %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %div, i1 false)
+  ret <2 x i32> %ctlz
+}
+
+define <2 x i32> @vec2_shl_ctlz_false_neg(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_shl_ctlz_false_neg(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> <i32 8387584, i32 4276440>, [[TMP0]]
+; CHECK-NEXT:    [[CTLZ:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[SHL]], i1 false), !range [[RNG2:![0-9]+]]
+; CHECK-NEXT:    ret <2 x i32> [[CTLZ]]
+;
+  %shl = shl <2 x i32> <i32 8387584, i32 4276440>, %0
+  %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %shl, i1 false)
+  ret <2 x i32> %ctlz
+}
+
+define <2 x i32> @vec2_lshr_cttz_false_neg(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_lshr_cttz_false_neg(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr <2 x i32> <i32 8387584, i32 4276440>, [[TMP0]]
+; CHECK-NEXT:    [[CTTZ:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[LSHR]], i1 false), !range [[RNG2]]
+; CHECK-NEXT:    ret <2 x i32> [[CTTZ]]
+;
+  %lshr = lshr <2 x i32> <i32 8387584, i32 4276440>, %0
+  %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %lshr, i1 false)
+  ret <2 x i32> %cttz
+}
+
+define <2 x i32> @vec2_shl_cttz_false_neg(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_shl_cttz_false_neg(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> <i32 8387584, i32 4276440>, [[TMP0]]
+; CHECK-NEXT:    [[CTTZ:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[SHL]], i1 false), !range [[RNG3:![0-9]+]]
+; CHECK-NEXT:    ret <2 x i32> [[CTTZ]]
+;
+  %shl = shl <2 x i32> <i32 8387584, i32 4276440>, %0
+  %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %shl, i1 false)
+  ret <2 x i32> %cttz
+}
+
+define i32 @lshr_ctlz_faslse_neg(i32) {
+; CHECK-LABEL: define i32 @lshr_ctlz_faslse_neg(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr i32 8387584, [[TMP0]]
+; CHECK-NEXT:    [[CTLZ:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LSHR]], i1 false), !range [[RNG1]]
+; CHECK-NEXT:    ret i32 [[CTLZ]]
+;
+  %lshr = lshr i32 8387584, %0
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %lshr, i1 false)
+  ret i32 %ctlz
+}
+
+define i32 @shl_ctlz_false_neg(i32) {
+; CHECK-LABEL: define i32 @shl_ctlz_false_neg(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 8387584, [[TMP0]]
+; CHECK-NEXT:    [[CTLZ:%.*]] = call i32 @llvm.ctlz.i32(i32 [[SHL]], i1 false), !range [[RNG2]]
+; CHECK-NEXT:    ret i32 [[CTLZ]]
+;
+  %shl = shl i32 8387584, %0
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %shl, i1 false)
+  ret i32 %ctlz
+}
+
+define i32 @lshr_cttz_false_neg(i32) {
+; CHECK-LABEL: define i32 @lshr_cttz_false_neg(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr i32 8387584, [[TMP0]]
+; CHECK-NEXT:    [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[LSHR]], i1 false), !range [[RNG2]]
+; CHECK-NEXT:    ret i32 [[CTTZ]]
+;
+  %lshr = lshr i32 8387584, %0
+  %cttz = call i32 @llvm.cttz.i32(i32 %lshr, i1 false)
+  ret i32 %cttz
+}
+
+define i32 @shl_cttz_false_neg(i32) {
+; CHECK-LABEL: define i32 @shl_cttz_false_neg(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 8387584, [[TMP0]]
+; CHECK-NEXT:    [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[SHL]], i1 false), !range [[RNG4:![0-9]+]]
+; CHECK-NEXT:    ret i32 [[CTTZ]]
+;
+  %shl = shl i32 8387584, %0
+  %cttz = call i32 @llvm.cttz.i32(i32 %shl, i1 false)
+  ret i32 %cttz
+}
+;.
+; CHECK: [[RNG0]] = !{i32 1, i32 33}
+; CHECK: [[RNG1]] = !{i32 9, i32 33}
+; CHECK: [[RNG2]] = !{i32 0, i32 33}
+; CHECK: [[RNG3]] = !{i32 3, i32 33}
+; CHECK: [[RNG4]] = !{i32 10, i32 33}
+;.


        


More information about the llvm-commits mailing list