[llvm] Missing opt with ctlz and shifts of power of 2 constants (#41333) (PR #74175)
Sizov Nikita via llvm-commits
llvm-commits at lists.llvm.org
Sat Dec 2 14:29:16 PST 2023
https://github.com/snikitav updated https://github.com/llvm/llvm-project/pull/74175
>From 3d48115fe7309f6d65bedc27fd89a9c7a4f49a15 Mon Sep 17 00:00:00 2001
From: Sizov Nikita <s.nikita.v at gmail.com>
Date: Sat, 2 Dec 2023 04:53:32 +0300
Subject: [PATCH] Missing opt with ctlz and shifts of power of 2 constants
(#41333)
---
.../InstCombine/InstCombineCalls.cpp | 33 +++
.../InstCombine/ctlz-cttz-shifts.ll | 243 ++++++++++++++++++
2 files changed, 276 insertions(+)
create mode 100644 llvm/test/Transforms/InstCombine/ctlz-cttz-shifts.ll
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index a991f0906052a..f6322b3f4f415 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -514,6 +514,8 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType()));
}
+ Constant *C;
+
if (IsTZ) {
// cttz(-x) -> cttz(x)
if (match(Op0, m_Neg(m_Value(X))))
@@ -549,6 +551,37 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(X))))
return IC.replaceOperand(II, 0, X);
+
+ // cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val)
+ if (match(Op0, m_Shl(m_Constant(C), m_Value(X))) && match(Op1, m_One())) {
+ Value *ConstCttz =
+ IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
+ return BinaryOperator::CreateAdd(ConstCttz, X);
+ }
+
+ // cttz(lshr exact (%const, %val), 0) --> sub(cttz(%const, 0), %val)
+ if (match(Op0, m_Exact(m_LShr(m_Constant(C), m_Value(X))))) {
+ Value *ConstCttz =
+ IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
+ return BinaryOperator::CreateSub(ConstCttz, X);
+ }
+ } else {
+ // ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
+ if (match(Op0, m_LShr(m_Constant(C), m_Value(X))) && match(Op1, m_One())) {
+ Value *ConstCtlz =
+ IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
+ return BinaryOperator::CreateAdd(ConstCtlz, X);
+ }
+
+ // ctlz(shl nuw (%const, %val), 0) |
+ // ctlz(shl nsw (%const, %val), 0) |--> sub(ctlz(%const, 0), %val)
+ // ctlz(shl nuw nsw (%const, %val), 0) |
+ if (match(Op0, m_NUWShl(m_Constant(C), m_Value(X))) ||
+ match(Op0, m_NSWShl(m_Constant(C), m_Value(X)))) {
+ Value *ConstCtlz =
+ IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
+ return BinaryOperator::CreateSub(ConstCtlz, X);
+ }
}
KnownBits Known = IC.computeKnownBits(Op0, 0, &II);
diff --git a/llvm/test/Transforms/InstCombine/ctlz-cttz-shifts.ll b/llvm/test/Transforms/InstCombine/ctlz-cttz-shifts.ll
new file mode 100644
index 0000000000000..5abe5ab6c9310
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/ctlz-cttz-shifts.ll
@@ -0,0 +1,243 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=instcombine -S < %s | FileCheck %s
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1)
+declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1)
+
+define i32 @lshr_ctlz_true(i32) {
+; CHECK-LABEL: define i32 @lshr_ctlz_true(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = add i32 [[TMP0]], 9
+; CHECK-NEXT: ret i32 [[CTLZ]]
+;
+ %lshr = lshr i32 8387584, %0
+ %ctlz = call i32 @llvm.ctlz.i32(i32 %lshr, i1 true)
+ ret i32 %ctlz
+}
+
+define i32 @shl_nuw_ctlz_false(i32) {
+; CHECK-LABEL: define i32 @shl_nuw_ctlz_false(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = sub i32 9, [[TMP0]]
+; CHECK-NEXT: ret i32 [[CTLZ]]
+;
+ %shl = shl nuw i32 8387584, %0
+ %ctlz = call i32 @llvm.ctlz.i32(i32 %shl, i1 false)
+ ret i32 %ctlz
+}
+
+define i32 @shl_nsw_ctlz_false(i32) {
+; CHECK-LABEL: define i32 @shl_nsw_ctlz_false(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = sub i32 9, [[TMP0]]
+; CHECK-NEXT: ret i32 [[CTLZ]]
+;
+ %shl = shl nsw i32 8387584, %0
+ %ctlz = call i32 @llvm.ctlz.i32(i32 %shl, i1 false)
+ ret i32 %ctlz
+}
+
+define i32 @shl_nuw_nsw_ctlz_false(i32) {
+; CHECK-LABEL: define i32 @shl_nuw_nsw_ctlz_false(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = sub i32 9, [[TMP0]]
+; CHECK-NEXT: ret i32 [[CTLZ]]
+;
+ %shl = shl nuw nsw i32 8387584, %0
+ %ctlz = call i32 @llvm.ctlz.i32(i32 %shl, i1 false)
+ ret i32 %ctlz
+}
+
+define i32 @lshr_exact_cttz_false(i32) {
+; CHECK-LABEL: define i32 @lshr_exact_cttz_false(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT: [[CTTZ:%.*]] = sub i32 10, [[TMP0]]
+; CHECK-NEXT: ret i32 [[CTTZ]]
+;
+ %lshr = lshr exact i32 8387584, %0
+ %cttz = call i32 @llvm.cttz.i32(i32 %lshr, i1 false)
+ ret i32 %cttz
+}
+
+define i32 @shl_cttz_true(i32) {
+; CHECK-LABEL: define i32 @shl_cttz_true(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT: [[CTTZ:%.*]] = add i32 [[TMP0]], 10
+; CHECK-NEXT: ret i32 [[CTTZ]]
+;
+ %shl = shl i32 8387584, %0
+ %cttz = call i32 @llvm.cttz.i32(i32 %shl, i1 true)
+ ret i32 %cttz
+}
+
+define <2 x i32> @vec2_lshr_ctlz_true(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_lshr_ctlz_true(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = add <2 x i32> [[TMP0]], <i32 9, i32 9>
+; CHECK-NEXT: ret <2 x i32> [[CTLZ]]
+;
+ %div = lshr <2 x i32> <i32 8387584, i32 4276440>, %0
+ %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %div, i1 true)
+ ret <2 x i32> %ctlz
+}
+
+define <2 x i32> @vec2_shl_nuw_ctlz_false(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_shl_nuw_ctlz_false(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = sub <2 x i32> <i32 9, i32 9>, [[TMP0]]
+; CHECK-NEXT: ret <2 x i32> [[CTLZ]]
+;
+ %shl = shl nuw <2 x i32> <i32 8387584, i32 4276440>, %0
+ %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %shl, i1 false)
+ ret <2 x i32> %ctlz
+}
+
+define <2 x i32> @vec2_shl_nsw_ctlz_false(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_shl_nsw_ctlz_false(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = sub <2 x i32> <i32 9, i32 9>, [[TMP0]]
+; CHECK-NEXT: ret <2 x i32> [[CTLZ]]
+;
+ %shl = shl nsw <2 x i32> <i32 8387584, i32 4276440>, %0
+ %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %shl, i1 false)
+ ret <2 x i32> %ctlz
+}
+
+define <2 x i32> @vec2_shl_nuw_nsw_ctlz_false(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_shl_nuw_nsw_ctlz_false(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = sub <2 x i32> <i32 9, i32 9>, [[TMP0]]
+; CHECK-NEXT: ret <2 x i32> [[CTLZ]]
+;
+ %shl = shl nuw nsw <2 x i32> <i32 8387584, i32 4276440>, %0
+ %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %shl, i1 false)
+ ret <2 x i32> %ctlz
+}
+
+define <2 x i32> @vec2_lshr_exact_cttz_false(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_lshr_exact_cttz_false(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT: [[CTTZ:%.*]] = sub <2 x i32> <i32 10, i32 3>, [[TMP0]]
+; CHECK-NEXT: ret <2 x i32> [[CTTZ]]
+;
+ %lshr = lshr exact <2 x i32> <i32 8387584, i32 4276440>, %0
+ %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %lshr, i1 false)
+ ret <2 x i32> %cttz
+}
+
+define <2 x i32> @vec2_shl_cttz_true(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_shl_cttz_true(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT: [[CTTZ:%.*]] = add <2 x i32> [[TMP0]], <i32 10, i32 3>
+; CHECK-NEXT: ret <2 x i32> [[CTTZ]]
+;
+ %shl = shl <2 x i32> <i32 8387584, i32 4276440>, %0
+ %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %shl, i1 true)
+ ret <2 x i32> %cttz
+}
+
+; negative tests:
+
+define <2 x i32> @vec2_lshr_ctlz_false_neg(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_lshr_ctlz_false_neg(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT: [[DIV:%.*]] = lshr <2 x i32> <i32 8387584, i32 4276440>, [[TMP0]]
+; CHECK-NEXT: [[CTLZ:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[DIV]], i1 false), !range [[RNG0:![0-9]+]]
+; CHECK-NEXT: ret <2 x i32> [[CTLZ]]
+;
+ %div = lshr <2 x i32> <i32 8387584, i32 4276440>, %0
+ %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %div, i1 false)
+ ret <2 x i32> %ctlz
+}
+
+define <2 x i32> @vec2_shl_ctlz_false_neg(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_shl_ctlz_false_neg(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> <i32 8387584, i32 4276440>, [[TMP0]]
+; CHECK-NEXT: [[CTLZ:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[SHL]], i1 false), !range [[RNG1:![0-9]+]]
+; CHECK-NEXT: ret <2 x i32> [[CTLZ]]
+;
+ %shl = shl <2 x i32> <i32 8387584, i32 4276440>, %0
+ %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %shl, i1 false)
+ ret <2 x i32> %ctlz
+}
+
+define <2 x i32> @vec2_lshr_cttz_false_neg(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_lshr_cttz_false_neg(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i32> <i32 8387584, i32 4276440>, [[TMP0]]
+; CHECK-NEXT: [[CTTZ:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[LSHR]], i1 false), !range [[RNG1]]
+; CHECK-NEXT: ret <2 x i32> [[CTTZ]]
+;
+ %lshr = lshr <2 x i32> <i32 8387584, i32 4276440>, %0
+ %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %lshr, i1 false)
+ ret <2 x i32> %cttz
+}
+
+define <2 x i32> @vec2_shl_cttz_false_neg(<2 x i32>) {
+; CHECK-LABEL: define <2 x i32> @vec2_shl_cttz_false_neg(
+; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
+; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> <i32 8387584, i32 4276440>, [[TMP0]]
+; CHECK-NEXT: [[CTTZ:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[SHL]], i1 false), !range [[RNG2:![0-9]+]]
+; CHECK-NEXT: ret <2 x i32> [[CTTZ]]
+;
+ %shl = shl <2 x i32> <i32 8387584, i32 4276440>, %0
+ %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %shl, i1 false)
+ ret <2 x i32> %cttz
+}
+
+define i32 @lshr_ctlz_faslse_neg(i32) {
+; CHECK-LABEL: define i32 @lshr_ctlz_faslse_neg(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 8387584, [[TMP0]]
+; CHECK-NEXT: [[CTLZ:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LSHR]], i1 false), !range [[RNG0]]
+; CHECK-NEXT: ret i32 [[CTLZ]]
+;
+ %lshr = lshr i32 8387584, %0
+ %ctlz = call i32 @llvm.ctlz.i32(i32 %lshr, i1 false)
+ ret i32 %ctlz
+}
+
+define i32 @shl_ctlz_false_neg(i32) {
+; CHECK-LABEL: define i32 @shl_ctlz_false_neg(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 8387584, [[TMP0]]
+; CHECK-NEXT: [[CTLZ:%.*]] = call i32 @llvm.ctlz.i32(i32 [[SHL]], i1 false), !range [[RNG1]]
+; CHECK-NEXT: ret i32 [[CTLZ]]
+;
+ %shl = shl i32 8387584, %0
+ %ctlz = call i32 @llvm.ctlz.i32(i32 %shl, i1 false)
+ ret i32 %ctlz
+}
+
+define i32 @lshr_cttz_false_neg(i32) {
+; CHECK-LABEL: define i32 @lshr_cttz_false_neg(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 8387584, [[TMP0]]
+; CHECK-NEXT: [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[LSHR]], i1 false), !range [[RNG1]]
+; CHECK-NEXT: ret i32 [[CTTZ]]
+;
+ %lshr = lshr i32 8387584, %0
+ %cttz = call i32 @llvm.cttz.i32(i32 %lshr, i1 false)
+ ret i32 %cttz
+}
+
+define i32 @shl_cttz_false_neg(i32) {
+; CHECK-LABEL: define i32 @shl_cttz_false_neg(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 8387584, [[TMP0]]
+; CHECK-NEXT: [[CTTZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[SHL]], i1 false), !range [[RNG3:![0-9]+]]
+; CHECK-NEXT: ret i32 [[CTTZ]]
+;
+ %shl = shl i32 8387584, %0
+ %cttz = call i32 @llvm.cttz.i32(i32 %shl, i1 false)
+ ret i32 %cttz
+}
+;.
+; CHECK: [[RNG0]] = !{i32 9, i32 33}
+; CHECK: [[RNG1]] = !{i32 0, i32 33}
+; CHECK: [[RNG2]] = !{i32 3, i32 33}
+; CHECK: [[RNG3]] = !{i32 10, i32 33}
+;.
More information about the llvm-commits
mailing list