[llvm] Missing opt with ctlz and shifts of power of 2 constants (#41333) (PR #74175)

Sizov Nikita via llvm-commits llvm-commits at lists.llvm.org
Sat Dec 2 17:28:01 PST 2023


https://github.com/snikitav updated https://github.com/llvm/llvm-project/pull/74175

>From 73adb5bf4c8bee7aea6b387b5dbef984d5d58bf8 Mon Sep 17 00:00:00 2001
From: Sizov Nikita <s.nikita.v at gmail.com>
Date: Sat, 2 Dec 2023 04:53:32 +0300
Subject: [PATCH] Missing opt with ctlz and shifts of power of 2 constants
 (#41333)

---
 .../InstCombine/InstCombineCalls.cpp          |  33 +++
 .../InstCombine/ctlz-cttz-shifts.ll           | 243 ++++++++++++++++++
 2 files changed, 276 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/ctlz-cttz-shifts.ll

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index a991f0906052a..f6322b3f4f415 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -514,6 +514,8 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
     return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType()));
   }
 
+  Constant *C;
+
   if (IsTZ) {
     // cttz(-x) -> cttz(x)
     if (match(Op0, m_Neg(m_Value(X))))
@@ -549,6 +551,37 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
 
     if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(X))))
       return IC.replaceOperand(II, 0, X);
+
+    // cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val)
+    if (match(Op0, m_Shl(m_Constant(C), m_Value(X))) && match(Op1, m_One())) {
+      Value *ConstCttz =
+          IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
+      return BinaryOperator::CreateAdd(ConstCttz, X);
+    }
+
+    // cttz(lshr exact (%const, %val), 0) --> sub(cttz(%const, 0), %val)
+    if (match(Op0, m_Exact(m_LShr(m_Constant(C), m_Value(X))))) {
+      Value *ConstCttz =
+          IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
+      return BinaryOperator::CreateSub(ConstCttz, X);
+    }
+  } else {
+    // ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
+    if (match(Op0, m_LShr(m_Constant(C), m_Value(X))) && match(Op1, m_One())) {
+      Value *ConstCtlz =
+          IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
+      return BinaryOperator::CreateAdd(ConstCtlz, X);
+    }
+
+    // ctlz(shl nuw (%const, %val), 0)     |
+    // ctlz(shl nsw (%const, %val), 0)     |--> sub(ctlz(%const, 0), %val)
+    // ctlz(shl nuw nsw (%const, %val), 0) |
+    if (match(Op0, m_NUWShl(m_Constant(C), m_Value(X))) ||
+        match(Op0, m_NSWShl(m_Constant(C), m_Value(X)))) {
+      Value *ConstCtlz =
+          IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
+      return BinaryOperator::CreateSub(ConstCtlz, X);
+    }
   }
 
   KnownBits Known = IC.computeKnownBits(Op0, 0, &II);
diff --git a/llvm/test/Transforms/InstCombine/ctlz-cttz-shifts.ll b/llvm/test/Transforms/InstCombine/ctlz-cttz-shifts.ll
new file mode 100644
index 0000000000000..5abe5ab6c9310
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/ctlz-cttz-shifts.ll
@@ -0,0 +1,243 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=instcombine -S < %s | FileCheck %s
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1)
+declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1)
+
+define i32 @lshr_ctlz_true(i32) {
+; CHECK-LABEL: define i32 @lshr_ctlz_true(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = add i32 [[TMP0]], 9
+; CHECK-NEXT:    ret i32 [[CTLZ]]
+;
+  %lshr = lshr i32 8387584, %0
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %lshr, i1 true)
+  ret i32 %ctlz
+}
+
+define i32 @shl_nuw_ctlz_false(i32) {
+; CHECK-LABEL: define i32 @shl_nuw_ctlz_false(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = sub i32 9, [[TMP0]]
+; CHECK-NEXT:    ret i32 [[CTLZ]]
+;
+  %shl = shl nuw i32 8387584, %0
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %shl, i1 false)
+  ret i32 %ctlz
+}
+
+define i32 @shl_nsw_ctlz_false(i32) {
+; CHECK-LABEL: define i32 @shl_nsw_ctlz_false(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = sub i32 9, [[TMP0]]
+; CHECK-NEXT:    ret i32 [[CTLZ]]
+;
+  %shl = shl nsw i32 8387584, %0
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %shl, i1 false)
+  ret i32 %ctlz
+}
+
+define i32 @shl_nuw_nsw_ctlz_false(i32) {
+; CHECK-LABEL: define i32 @shl_nuw_nsw_ctlz_false(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = sub i32 9, [[TMP0]]
+; CHECK-NEXT:    ret i32 [[CTLZ]]
+;
+  %shl = shl nuw nsw i32 8387584, %0
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %shl, i1 false)
+  ret i32 %ctlz
+}
+
+define i32 @lshr_exact_cttz_false(i32) {
+; CHECK-LABEL: define i32 @lshr_exact_cttz_false(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[CTTZ:%.*]] = sub i32 10, [[TMP0]]
+; CHECK-NEXT:    ret i32 [[CTTZ]]
+;
+  %lshr = lshr exact i32 8387584, %0
+  %cttz = call i32 @llvm.cttz.i32(i32 %lshr, i1 false)
+  ret i32 %cttz
+}
+
+define i32 @shl_cttz_true(i32) {
+; CHECK-LABEL: define i32 @shl_cttz_true(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[CTTZ:%.*]] = add i32 [[TMP0]], 10
+; CHECK-NEXT:    ret i32 [[CTTZ]]
+;
+  %shl = shl i32 8387584, %0
+  %cttz = call i32 @llvm.cttz.i32(i32 %shl, i1 true)
+  ret i32 %cttz
+}
+
+define <2 x i32> @vec2_lshr_ctlz_true(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_lshr_ctlz_true(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = add <2 x i32> [[TMP0]], <i32 9, i32 9>
+; CHECK-NEXT:    ret <2 x i32> [[CTLZ]]
+;
+  %div = lshr <2 x i32> <i32 8387584, i32 4276440>, %0
+  %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %div, i1 true)
+  ret <2 x i32> %ctlz
+}
+
+define <2 x i32> @vec2_shl_nuw_ctlz_false(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_shl_nuw_ctlz_false(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = sub <2 x i32> <i32 9, i32 9>, [[TMP0]]
+; CHECK-NEXT:    ret <2 x i32> [[CTLZ]]
+;
+  %shl = shl nuw <2 x i32> <i32 8387584, i32 4276440>, %0
+  %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %shl, i1 false)
+  ret <2 x i32> %ctlz
+}
+
+define <2 x i32> @vec2_shl_nsw_ctlz_false(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_shl_nsw_ctlz_false(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = sub <2 x i32> <i32 9, i32 9>, [[TMP0]]
+; CHECK-NEXT:    ret <2 x i32> [[CTLZ]]
+;
+  %shl = shl nsw <2 x i32> <i32 8387584, i32 4276440>, %0
+  %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %shl, i1 false)
+  ret <2 x i32> %ctlz
+}
+
+define <2 x i32> @vec2_shl_nuw_nsw_ctlz_false(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_shl_nuw_nsw_ctlz_false(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = sub <2 x i32> <i32 9, i32 9>, [[TMP0]]
+; CHECK-NEXT:    ret <2 x i32> [[CTLZ]]
+;
+  %shl = shl nuw nsw <2 x i32> <i32 8387584, i32 4276440>, %0
+  %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %shl, i1 false)
+  ret <2 x i32> %ctlz
+}
+
+define <2 x i32> @vec2_lshr_exact_cttz_false(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_lshr_exact_cttz_false(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[CTTZ:%.*]] = sub <2 x i32> <i32 10, i32 3>, [[TMP0]]
+; CHECK-NEXT:    ret <2 x i32> [[CTTZ]]
+;
+  %lshr = lshr exact <2 x i32> <i32 8387584, i32 4276440>, %0
+  %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %lshr, i1 false)
+  ret <2 x i32> %cttz
+}
+
+define <2 x i32> @vec2_shl_cttz_true(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_shl_cttz_true(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[CTTZ:%.*]] = add <2 x i32> [[TMP0]], <i32 10, i32 3>
+; CHECK-NEXT:    ret <2 x i32> [[CTTZ]]
+;
+  %shl = shl <2 x i32> <i32 8387584, i32 4276440>, %0
+  %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %shl, i1 true)
+  ret <2 x i32> %cttz
+}
+
+; negative tests:
+
+define <2 x i32> @vec2_lshr_ctlz_false_neg(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_lshr_ctlz_false_neg(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[DIV:%.*]] = lshr <2 x i32> <i32 8387584, i32 4276440>, [[TMP0]]
+; CHECK-NEXT:    [[CTLZ:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[DIV]], i1 false), !range [[RNG0:![0-9]+]]
+; CHECK-NEXT:    ret <2 x i32> [[CTLZ]]
+;
+  %div = lshr <2 x i32> <i32 8387584, i32 4276440>, %0
+  %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %div, i1 false)
+  ret <2 x i32> %ctlz
+}
+
+define <2 x i32> @vec2_shl_ctlz_false_neg(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_shl_ctlz_false_neg(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> <i32 8387584, i32 4276440>, [[TMP0]]
+; CHECK-NEXT:    [[CTLZ:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[SHL]], i1 false), !range [[RNG1:![0-9]+]]
+; CHECK-NEXT:    ret <2 x i32> [[CTLZ]]
+;
+  %shl = shl <2 x i32> <i32 8387584, i32 4276440>, %0
+  %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %shl, i1 false)
+  ret <2 x i32> %ctlz
+}
+
+define <2 x i32> @vec2_lshr_cttz_false_neg(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_lshr_cttz_false_neg(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr <2 x i32> <i32 8387584, i32 4276440>, [[TMP0]]
+; CHECK-NEXT:    [[CTTZ:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[LSHR]], i1 false), !range [[RNG1]]
+; CHECK-NEXT:    ret <2 x i32> [[CTTZ]]
+;
+  %lshr = lshr <2 x i32> <i32 8387584, i32 4276440>, %0
+  %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %lshr, i1 false)
+  ret <2 x i32> %cttz
+}
+
+define <2 x i32> @vec2_shl_cttz_false_neg(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_shl_cttz_false_neg(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> <i32 8387584, i32 4276440>, [[TMP0]]
+; CHECK-NEXT:    [[CTTZ:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[SHL]], i1 false), !range [[RNG2:![0-9]+]]
+; CHECK-NEXT:    ret <2 x i32> [[CTTZ]]
+;
+  %shl = shl <2 x i32> <i32 8387584, i32 4276440>, %0
+  %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %shl, i1 false)
+  ret <2 x i32> %cttz
+}
+
+define i32 @lshr_ctlz_faslse_neg(i32) {
+; CHECK-LABEL: define i32 @lshr_ctlz_faslse_neg(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr i32 8387584, [[TMP0]]
+; CHECK-NEXT:    [[CTLZ:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LSHR]], i1 false), !range [[RNG0]]
+; CHECK-NEXT:    ret i32 [[CTLZ]]
+;
+  %lshr = lshr i32 8387584, %0
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %lshr, i1 false)
+  ret i32 %ctlz
+}
+
+define i32 @shl_ctlz_false_neg(i32) {
+; CHECK-LABEL: define i32 @shl_ctlz_false_neg(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 8387584, [[TMP0]]
+; CHECK-NEXT:    [[CTLZ:%.*]] = call i32 @llvm.ctlz.i32(i32 [[SHL]], i1 false), !range [[RNG1]]
+; CHECK-NEXT:    ret i32 [[CTLZ]]
+;
+  %shl = shl i32 8387584, %0
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %shl, i1 false)
+  ret i32 %ctlz
+}
+
+define i32 @lshr_cttz_false_neg(i32) {
+; CHECK-LABEL: define i32 @lshr_cttz_false_neg(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr i32 8387584, [[TMP0]]
+; CHECK-NEXT:    [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[LSHR]], i1 false), !range [[RNG1]]
+; CHECK-NEXT:    ret i32 [[CTTZ]]
+;
+  %lshr = lshr i32 8387584, %0
+  %cttz = call i32 @llvm.cttz.i32(i32 %lshr, i1 false)
+  ret i32 %cttz
+}
+
+define i32 @shl_cttz_false_neg(i32) {
+; CHECK-LABEL: define i32 @shl_cttz_false_neg(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 8387584, [[TMP0]]
+; CHECK-NEXT:    [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[SHL]], i1 false), !range [[RNG3:![0-9]+]]
+; CHECK-NEXT:    ret i32 [[CTTZ]]
+;
+  %shl = shl i32 8387584, %0
+  %cttz = call i32 @llvm.cttz.i32(i32 %shl, i1 false)
+  ret i32 %cttz
+}
+;.
+; CHECK: [[RNG0]] = !{i32 9, i32 33}
+; CHECK: [[RNG1]] = !{i32 0, i32 33}
+; CHECK: [[RNG2]] = !{i32 3, i32 33}
+; CHECK: [[RNG3]] = !{i32 10, i32 33}
+;.



More information about the llvm-commits mailing list