[llvm] [InstCombine] Fold Minimum over Trailing/Leading Bits Counts (PR #90402)

via llvm-commits llvm-commits at lists.llvm.org
Sun Apr 28 09:07:18 PDT 2024


https://github.com/mskamp created https://github.com/llvm/llvm-project/pull/90402

Fixes #90000

>From 9fdd0b1822a1f54b1c14ce567aeefdf7108ecafe Mon Sep 17 00:00:00 2001
From: Marius Kamp <msk at posteo.org>
Date: Sat, 27 Apr 2024 17:51:44 +0200
Subject: [PATCH 1/2] [InstCombine][NFC] Add Tests for Minimum over
 Trailing/Leading Bits Counts

---
 .../Transforms/InstCombine/umin_cttz_ctlz.ll  | 355 ++++++++++++++++++
 1 file changed, 355 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll

diff --git a/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
new file mode 100644
index 00000000000000..25c9d75c2bbdc2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
@@ -0,0 +1,355 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+declare i1 @llvm.umin.i1(i1 %a, i1 %b)
+declare i8 @llvm.umin.i8(i8 %a, i8 %b)
+declare i16 @llvm.umin.i16(i16 %a, i16 %b)
+declare i32 @llvm.umin.i32(i32 %a, i32 %b)
+declare i64 @llvm.umin.i64(i64 %a, i64 %b)
+declare <2 x i32> @llvm.umin.v2i32(<2 x i32> %a, <2 x i32> %b)
+
+declare i1 @llvm.cttz.i1(i1, i1)
+declare i8 @llvm.cttz.i8(i8, i1)
+declare i16 @llvm.cttz.i16(i16, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare i64 @llvm.cttz.i64(i64, i1)
+declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1)
+
+declare i1 @llvm.ctlz.i1(i1, i1)
+declare i8 @llvm.ctlz.i8(i8, i1)
+declare i16 @llvm.ctlz.i16(i16, i1)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i64 @llvm.ctlz.i64(i64, i1)
+declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1)
+
+define i8 @umin_cttz_i8_zero_undefined(i8 %X) {
+; CHECK-LABEL: define i8 @umin_cttz_i8_zero_undefined(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 true)
+; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6)
+; CHECK-NEXT:    ret i8 [[RET]]
+;
+  %cttz = call i8 @llvm.cttz.i8(i8 %X, i1 true)
+  %ret = call i8 @llvm.umin.i8(i8 %cttz, i8 6)
+  ret i8 %ret
+}
+
+define i8 @umin_cttz_i8_zero_defined(i8 %X) {
+; CHECK-LABEL: define i8 @umin_cttz_i8_zero_defined(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 false)
+; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6)
+; CHECK-NEXT:    ret i8 [[RET]]
+;
+  %cttz = call i8 @llvm.cttz.i8(i8 %X, i1 false)
+  %ret = call i8 @llvm.umin.i8(i8 %cttz, i8 6)
+  ret i8 %ret
+}
+
+define i8 @umin_cttz_i8_commuted_zero_undefined(i8 %X) {
+; CHECK-LABEL: define i8 @umin_cttz_i8_commuted_zero_undefined(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 true)
+; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6)
+; CHECK-NEXT:    ret i8 [[RET]]
+;
+  %cttz = call i8 @llvm.cttz.i8(i8 %X, i1 true)
+  %ret = call i8 @llvm.umin.i8(i8 6, i8 %cttz)
+  ret i8 %ret
+}
+
+define i8 @umin_cttz_i8_ge_bitwidth_zero_undefined(i8 %X) {
+; CHECK-LABEL: define i8 @umin_cttz_i8_ge_bitwidth_zero_undefined(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 true)
+; CHECK-NEXT:    ret i8 [[CTTZ]]
+;
+  %cttz = call i8 @llvm.cttz.i8(i8 %X, i1 true)
+  %ret = call i8 @llvm.umin.i8(i8 %cttz, i8 10)
+  ret i8 %ret
+}
+
+define i16 @umin_cttz_i16_zero_undefined(i16 %X) {
+; CHECK-LABEL: define i16 @umin_cttz_i16_zero_undefined(
+; CHECK-SAME: i16 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTTZ:%.*]] = call range(i16 0, 17) i16 @llvm.cttz.i16(i16 [[X]], i1 true)
+; CHECK-NEXT:    [[RET:%.*]] = call i16 @llvm.umin.i16(i16 [[CTTZ]], i16 6)
+; CHECK-NEXT:    ret i16 [[RET]]
+;
+  %cttz = call i16 @llvm.cttz.i16(i16 %X, i1 true)
+  %ret = call i16 @llvm.umin.i16(i16 %cttz, i16 6)
+  ret i16 %ret
+}
+
+define i32 @umin_cttz_i32_zero_undefined(i32 %X) {
+; CHECK-LABEL: define i32 @umin_cttz_i32_zero_undefined(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTTZ:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[X]], i1 true)
+; CHECK-NEXT:    [[RET:%.*]] = call i32 @llvm.umin.i32(i32 [[CTTZ]], i32 6)
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %cttz = call i32 @llvm.cttz.i32(i32 %X, i1 true)
+  %ret = call i32 @llvm.umin.i32(i32 %cttz, i32 6)
+  ret i32 %ret
+}
+
+define i64 @umin_cttz_i64_zero_undefined(i64 %X) {
+; CHECK-LABEL: define i64 @umin_cttz_i64_zero_undefined(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTTZ:%.*]] = call range(i64 0, 65) i64 @llvm.cttz.i64(i64 [[X]], i1 true)
+; CHECK-NEXT:    [[RET:%.*]] = call i64 @llvm.umin.i64(i64 [[CTTZ]], i64 6)
+; CHECK-NEXT:    ret i64 [[RET]]
+;
+  %cttz = call i64 @llvm.cttz.i64(i64 %X, i1 true)
+  %ret = call i64 @llvm.umin.i64(i64 %cttz, i64 6)
+  ret i64 %ret
+}
+
+define i1 @umin_cttz_i1_zero_undefined(i1 %X) {
+; CHECK-LABEL: define i1 @umin_cttz_i1_zero_undefined(
+; CHECK-SAME: i1 [[X:%.*]]) {
+; CHECK-NEXT:    ret i1 false
+;
+  %cttz = call i1 @llvm.cttz.i1(i1 %X, i1 true)
+  %ret = call i1 @llvm.umin.i1(i1 %cttz, i1 1)
+  ret i1 %ret
+}
+
+define i1 @umin_cttz_i1_zero_defined(i1 %X) {
+; CHECK-LABEL: define i1 @umin_cttz_i1_zero_defined(
+; CHECK-SAME: i1 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTTZ:%.*]] = xor i1 [[X]], true
+; CHECK-NEXT:    ret i1 [[CTTZ]]
+;
+  %cttz = call i1 @llvm.cttz.i1(i1 %X, i1 false)
+  %ret = call i1 @llvm.umin.i1(i1 %cttz, i1 1)
+  ret i1 %ret
+}
+
+define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(<2 x i32> %X) {
+; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(
+; CHECK-SAME: <2 x i32> [[X:%.*]]) {
+; CHECK-NEXT:    [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
+; CHECK-NEXT:    [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 6, i32 6>)
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
+  %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %cttz, <2 x i32> <i32 6, i32 6>)
+  ret <2 x i32> %ret
+}
+
+define <2 x i32> @umin_cttz_2xi32_splat_poison_zero_undefined(<2 x i32> %X) {
+; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_splat_poison_zero_undefined(
+; CHECK-SAME: <2 x i32> [[X:%.*]]) {
+; CHECK-NEXT:    [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
+; CHECK-NEXT:    [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 6, i32 poison>)
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
+  %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %cttz, <2 x i32> <i32 6, i32 poison>)
+  ret <2 x i32> %ret
+}
+
+define <2 x i32> @umin_cttz_2xi32_no_splat_negative_zero_undefined(<2 x i32> %X) {
+; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_no_splat_negative_zero_undefined(
+; CHECK-SAME: <2 x i32> [[X:%.*]]) {
+; CHECK-NEXT:    [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
+; CHECK-NEXT:    [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 6, i32 0>)
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
+  %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %cttz, <2 x i32> <i32 6, i32 0>)
+  ret <2 x i32> %ret
+}
+
+define i16 @umin_cttz_i16_negative_non_constant(i16 %X, i16 %Y) {
+; CHECK-LABEL: define i16 @umin_cttz_i16_negative_non_constant(
+; CHECK-SAME: i16 [[X:%.*]], i16 [[Y:%.*]]) {
+; CHECK-NEXT:    [[CTTZ:%.*]] = call range(i16 0, 17) i16 @llvm.cttz.i16(i16 [[X]], i1 true)
+; CHECK-NEXT:    [[RET:%.*]] = call i16 @llvm.umin.i16(i16 [[CTTZ]], i16 [[Y]])
+; CHECK-NEXT:    ret i16 [[RET]]
+;
+  %cttz = call i16 @llvm.cttz.i16(i16 %X, i1 true)
+  %ret = call i16 @llvm.umin.i16(i16 %cttz, i16 %Y)
+  ret i16 %ret
+}
+
+define i16 @umin_cttz_i16_negative_two_uses(i16 %X) {
+; CHECK-LABEL: define i16 @umin_cttz_i16_negative_two_uses(
+; CHECK-SAME: i16 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTTZ:%.*]] = call range(i16 0, 17) i16 @llvm.cttz.i16(i16 [[X]], i1 true)
+; CHECK-NEXT:    [[OP0:%.*]] = call i16 @llvm.umin.i16(i16 [[CTTZ]], i16 6)
+; CHECK-NEXT:    [[RET:%.*]] = add nuw nsw i16 [[CTTZ]], [[OP0]]
+; CHECK-NEXT:    ret i16 [[RET]]
+;
+  %cttz = call i16 @llvm.cttz.i16(i16 %X, i1 true)
+  %op0 = call i16 @llvm.umin.i16(i16 %cttz, i16 6)
+  %ret = add i16 %cttz, %op0
+  ret i16 %ret
+}
+
+define i8 @umin_ctlz_i8_zero_undefined(i8 %X) {
+; CHECK-LABEL: define i8 @umin_ctlz_i8_zero_undefined(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 true)
+; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6)
+; CHECK-NEXT:    ret i8 [[RET]]
+;
+  %ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 true)
+  %ret = call i8 @llvm.umin.i8(i8 %ctlz, i8 6)
+  ret i8 %ret
+}
+
+define i8 @umin_ctlz_i8_zero_defined(i8 %X) {
+; CHECK-LABEL: define i8 @umin_ctlz_i8_zero_defined(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 false)
+; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6)
+; CHECK-NEXT:    ret i8 [[RET]]
+;
+  %ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 false)
+  %ret = call i8 @llvm.umin.i8(i8 %ctlz, i8 6)
+  ret i8 %ret
+}
+
+define i8 @umin_ctlz_i8_commuted_zero_undefined(i8 %X) {
+; CHECK-LABEL: define i8 @umin_ctlz_i8_commuted_zero_undefined(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 true)
+; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6)
+; CHECK-NEXT:    ret i8 [[RET]]
+;
+  %ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 true)
+  %ret = call i8 @llvm.umin.i8(i8 6, i8 %ctlz)
+  ret i8 %ret
+}
+
+define i8 @umin_ctlz_i8_ge_bitwidth_zero_undefined(i8 %X) {
+; CHECK-LABEL: define i8 @umin_ctlz_i8_ge_bitwidth_zero_undefined(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 true)
+; CHECK-NEXT:    ret i8 [[CTLZ]]
+;
+  %ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 true)
+  %ret = call i8 @llvm.umin.i8(i8 %ctlz, i8 10)
+  ret i8 %ret
+}
+
+define i16 @umin_ctlz_i16_zero_undefined(i16 %X) {
+; CHECK-LABEL: define i16 @umin_ctlz_i16_zero_undefined(
+; CHECK-SAME: i16 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = call range(i16 0, 17) i16 @llvm.ctlz.i16(i16 [[X]], i1 true)
+; CHECK-NEXT:    [[RET:%.*]] = call i16 @llvm.umin.i16(i16 [[CTLZ]], i16 6)
+; CHECK-NEXT:    ret i16 [[RET]]
+;
+  %ctlz = call i16 @llvm.ctlz.i16(i16 %X, i1 true)
+  %ret = call i16 @llvm.umin.i16(i16 %ctlz, i16 6)
+  ret i16 %ret
+}
+
+define i32 @umin_ctlz_i32_zero_undefined(i32 %X) {
+; CHECK-LABEL: define i32 @umin_ctlz_i32_zero_undefined(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
+; CHECK-NEXT:    [[RET:%.*]] = call i32 @llvm.umin.i32(i32 [[CTLZ]], i32 6)
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %X, i1 true)
+  %ret = call i32 @llvm.umin.i32(i32 %ctlz, i32 6)
+  ret i32 %ret
+}
+
+define i64 @umin_ctlz_i64_zero_undefined(i64 %X) {
+; CHECK-LABEL: define i64 @umin_ctlz_i64_zero_undefined(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
+; CHECK-NEXT:    [[RET:%.*]] = call i64 @llvm.umin.i64(i64 [[CTLZ]], i64 6)
+; CHECK-NEXT:    ret i64 [[RET]]
+;
+  %ctlz = call i64 @llvm.ctlz.i64(i64 %X, i1 true)
+  %ret = call i64 @llvm.umin.i64(i64 %ctlz, i64 6)
+  ret i64 %ret
+}
+
+define i1 @umin_ctlz_i1_zero_undefined(i1 %X) {
+; CHECK-LABEL: define i1 @umin_ctlz_i1_zero_undefined(
+; CHECK-SAME: i1 [[X:%.*]]) {
+; CHECK-NEXT:    ret i1 false
+;
+  %ctlz = call i1 @llvm.ctlz.i1(i1 %X, i1 true)
+  %ret = call i1 @llvm.umin.i1(i1 %ctlz, i1 1)
+  ret i1 %ret
+}
+
+define i1 @umin_ctlz_i1_zero_defined(i1 %X) {
+; CHECK-LABEL: define i1 @umin_ctlz_i1_zero_defined(
+; CHECK-SAME: i1 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = xor i1 [[X]], true
+; CHECK-NEXT:    ret i1 [[CTLZ]]
+;
+  %ctlz = call i1 @llvm.ctlz.i1(i1 %X, i1 false)
+  %ret = call i1 @llvm.umin.i1(i1 %ctlz, i1 1)
+  ret i1 %ret
+}
+
+define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(<2 x i32> %X) {
+; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(
+; CHECK-SAME: <2 x i32> [[X:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
+; CHECK-NEXT:    [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 6, i32 6>)
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)
+  %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %ctlz, <2 x i32> <i32 6, i32 6>)
+  ret <2 x i32> %ret
+}
+
+define <2 x i32> @umin_ctlz_2xi32_splat_poison_zero_undefined(<2 x i32> %X) {
+; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_splat_poison_zero_undefined(
+; CHECK-SAME: <2 x i32> [[X:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
+; CHECK-NEXT:    [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 6, i32 poison>)
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)
+  %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %ctlz, <2 x i32> <i32 6, i32 poison>)
+  ret <2 x i32> %ret
+}
+
+define <2 x i32> @umin_ctlz_2xi32_no_splat_negative_zero_undefined(<2 x i32> %X) {
+; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_no_splat_negative_zero_undefined(
+; CHECK-SAME: <2 x i32> [[X:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
+; CHECK-NEXT:    [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 6, i32 0>)
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)
+  %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %ctlz, <2 x i32> <i32 6, i32 0>)
+  ret <2 x i32> %ret
+}
+
+define i16 @umin_ctlz_i16_negative_non_constant(i16 %X, i16 %Y) {
+; CHECK-LABEL: define i16 @umin_ctlz_i16_negative_non_constant(
+; CHECK-SAME: i16 [[X:%.*]], i16 [[Y:%.*]]) {
+; CHECK-NEXT:    [[CTLZ:%.*]] = call range(i16 0, 17) i16 @llvm.ctlz.i16(i16 [[X]], i1 true)
+; CHECK-NEXT:    [[RET:%.*]] = call i16 @llvm.umin.i16(i16 [[CTLZ]], i16 [[Y]])
+; CHECK-NEXT:    ret i16 [[RET]]
+;
+  %ctlz = call i16 @llvm.ctlz.i16(i16 %X, i1 true)
+  %ret = call i16 @llvm.umin.i16(i16 %ctlz, i16 %Y)
+  ret i16 %ret
+}
+
+define i16 @umin_ctlz_i16_negative_two_uses(i16 %X) {
+; CHECK-LABEL: define i16 @umin_ctlz_i16_negative_two_uses(
+; CHECK-SAME: i16 [[X:%.*]]) {
+; CHECK-NEXT:    [[CTTZ:%.*]] = call range(i16 0, 17) i16 @llvm.ctlz.i16(i16 [[X]], i1 true)
+; CHECK-NEXT:    [[OP0:%.*]] = call i16 @llvm.umin.i16(i16 [[CTTZ]], i16 6)
+; CHECK-NEXT:    [[RET:%.*]] = add nuw nsw i16 [[CTTZ]], [[OP0]]
+; CHECK-NEXT:    ret i16 [[RET]]
+;
+  %ctlz = call i16 @llvm.ctlz.i16(i16 %X, i1 true)
+  %op0 = call i16 @llvm.umin.i16(i16 %ctlz, i16 6)
+  %ret = add i16 %ctlz, %op0
+  ret i16 %ret
+}

>From c727b10c8f22c5e7e6ad007cff083564b9639906 Mon Sep 17 00:00:00 2001
From: Marius Kamp <msk at posteo.org>
Date: Sat, 27 Apr 2024 19:46:42 +0200
Subject: [PATCH 2/2] [InstCombine] Fold Minimum over Trailing/Leading Bits
 Counts (#90000)

The new transformation folds `umin(cttz(x), c)` to `cttz(x | (1 << c))`
and `umin(ctlz(x), c)` to `ctlz(x | ((1 << (bitwidth - 1)) >> c))`. The
transformation is only implemented for constant `c` to not increase the
number of instructions.

The idea of the transformation is to set the c-th lowest (for `cttz`) or
highest (for `ctlz`) bit in the operand. In this way, the `cttz` or
`ctlz` instruction always returns at most `c`.

Alive2 proofs: https://alive2.llvm.org/ce/z/xRZTE7
---
 llvm/include/llvm/IR/PatternMatch.h           | 12 ++++
 .../InstCombine/InstCombineCalls.cpp          | 33 +++++++++++
 .../Transforms/InstCombine/umin_cttz_ctlz.ll  | 56 +++++++++----------
 3 files changed, 73 insertions(+), 28 deletions(-)

diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index 0b13b4aad9c326..36d64c88427883 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -2466,6 +2466,18 @@ inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BSwap(const Opnd0 &Op0) {
   return m_Intrinsic<Intrinsic::bswap>(Op0);
 }
 
+template <typename Opnd0, typename Opnd1>
+inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_Ctlz(const Opnd0 &Op0,
+                                                        const Opnd1 &Op1) {
+  return m_Intrinsic<Intrinsic::ctlz>(Op0, Op1);
+}
+
+template <typename Opnd0, typename Opnd1>
+inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_Cttz(const Opnd0 &Op0,
+                                                        const Opnd1 &Op1) {
+  return m_Intrinsic<Intrinsic::cttz>(Op0, Op1);
+}
+
 template <typename Opnd0>
 inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FAbs(const Opnd0 &Op0) {
   return m_Intrinsic<Intrinsic::fabs>(Op0);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e5652458f150b5..db742fbe668cc3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1633,6 +1633,39 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
       Value *Cmp = Builder.CreateICmpNE(I0, Zero);
       return CastInst::Create(Instruction::ZExt, Cmp, II->getType());
     }
+    // umin(cttz(x), const) --> cttz(x | (1 << const))
+    Value *X;
+    const APInt *Y;
+    Value *Z;
+    if (match(I0, m_OneUse(m_Cttz(m_Value(X), m_Value(Z)))) &&
+        match(I1, m_APInt(Y))) {
+      Value *CttzOp = X;
+      if (Y->ult(I1->getType()->getScalarType()->getIntegerBitWidth())) {
+        auto One = APInt::getOneBitSet(
+            I1->getType()->getScalarType()->getIntegerBitWidth(), 0);
+        Value *NewConst = ConstantInt::get(I1->getType(), One << *Y);
+        CttzOp = Builder.CreateOr(X, NewConst);
+      }
+      return CallInst::Create(Intrinsic::getDeclaration(II->getModule(),
+                                                        Intrinsic::cttz,
+                                                        II->getType()),
+                              {CttzOp, Z});
+    }
+    // umin(ctlz(x), const) --> ctlz(x | ((1 << (bitwidth - 1) >> const)))
+    if (match(I0, m_OneUse(m_Ctlz(m_Value(X), m_Value(Z)))) &&
+        match(I1, m_APInt(Y))) {
+      Value *CtlzOp = X;
+      if (Y->ult(I1->getType()->getScalarType()->getIntegerBitWidth())) {
+        auto Min = APInt::getSignedMinValue(
+            I1->getType()->getScalarType()->getIntegerBitWidth());
+        Value *NewConst = ConstantInt::get(I1->getType(), Min.lshr(*Y));
+        CtlzOp = Builder.CreateOr(X, NewConst);
+      }
+      return CallInst::Create(Intrinsic::getDeclaration(II->getModule(),
+                                                        Intrinsic::ctlz,
+                                                        II->getType()),
+                              {CtlzOp, Z});
+    }
     [[fallthrough]];
   }
   case Intrinsic::umax: {
diff --git a/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
index 25c9d75c2bbdc2..91f5b818c7ff9a 100644
--- a/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
+++ b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
@@ -25,8 +25,8 @@ declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1)
 define i8 @umin_cttz_i8_zero_undefined(i8 %X) {
 ; CHECK-LABEL: define i8 @umin_cttz_i8_zero_undefined(
 ; CHECK-SAME: i8 [[X:%.*]]) {
-; CHECK-NEXT:    [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 true)
-; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6)
+; CHECK-NEXT:    [[TMP1:%.*]] = or i8 [[X]], 64
+; CHECK-NEXT:    [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true)
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
   %cttz = call i8 @llvm.cttz.i8(i8 %X, i1 true)
@@ -37,8 +37,8 @@ define i8 @umin_cttz_i8_zero_undefined(i8 %X) {
 define i8 @umin_cttz_i8_zero_defined(i8 %X) {
 ; CHECK-LABEL: define i8 @umin_cttz_i8_zero_defined(
 ; CHECK-SAME: i8 [[X:%.*]]) {
-; CHECK-NEXT:    [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 false)
-; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6)
+; CHECK-NEXT:    [[TMP1:%.*]] = or i8 [[X]], 64
+; CHECK-NEXT:    [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true)
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
   %cttz = call i8 @llvm.cttz.i8(i8 %X, i1 false)
@@ -49,8 +49,8 @@ define i8 @umin_cttz_i8_zero_defined(i8 %X) {
 define i8 @umin_cttz_i8_commuted_zero_undefined(i8 %X) {
 ; CHECK-LABEL: define i8 @umin_cttz_i8_commuted_zero_undefined(
 ; CHECK-SAME: i8 [[X:%.*]]) {
-; CHECK-NEXT:    [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 true)
-; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6)
+; CHECK-NEXT:    [[TMP1:%.*]] = or i8 [[X]], 64
+; CHECK-NEXT:    [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true)
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
   %cttz = call i8 @llvm.cttz.i8(i8 %X, i1 true)
@@ -72,8 +72,8 @@ define i8 @umin_cttz_i8_ge_bitwidth_zero_undefined(i8 %X) {
 define i16 @umin_cttz_i16_zero_undefined(i16 %X) {
 ; CHECK-LABEL: define i16 @umin_cttz_i16_zero_undefined(
 ; CHECK-SAME: i16 [[X:%.*]]) {
-; CHECK-NEXT:    [[CTTZ:%.*]] = call range(i16 0, 17) i16 @llvm.cttz.i16(i16 [[X]], i1 true)
-; CHECK-NEXT:    [[RET:%.*]] = call i16 @llvm.umin.i16(i16 [[CTTZ]], i16 6)
+; CHECK-NEXT:    [[TMP1:%.*]] = or i16 [[X]], 64
+; CHECK-NEXT:    [[RET:%.*]] = call range(i16 0, 7) i16 @llvm.cttz.i16(i16 [[TMP1]], i1 true)
 ; CHECK-NEXT:    ret i16 [[RET]]
 ;
   %cttz = call i16 @llvm.cttz.i16(i16 %X, i1 true)
@@ -84,8 +84,8 @@ define i16 @umin_cttz_i16_zero_undefined(i16 %X) {
 define i32 @umin_cttz_i32_zero_undefined(i32 %X) {
 ; CHECK-LABEL: define i32 @umin_cttz_i32_zero_undefined(
 ; CHECK-SAME: i32 [[X:%.*]]) {
-; CHECK-NEXT:    [[CTTZ:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[X]], i1 true)
-; CHECK-NEXT:    [[RET:%.*]] = call i32 @llvm.umin.i32(i32 [[CTTZ]], i32 6)
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X]], 64
+; CHECK-NEXT:    [[RET:%.*]] = call range(i32 0, 7) i32 @llvm.cttz.i32(i32 [[TMP1]], i1 true)
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
   %cttz = call i32 @llvm.cttz.i32(i32 %X, i1 true)
@@ -96,8 +96,8 @@ define i32 @umin_cttz_i32_zero_undefined(i32 %X) {
 define i64 @umin_cttz_i64_zero_undefined(i64 %X) {
 ; CHECK-LABEL: define i64 @umin_cttz_i64_zero_undefined(
 ; CHECK-SAME: i64 [[X:%.*]]) {
-; CHECK-NEXT:    [[CTTZ:%.*]] = call range(i64 0, 65) i64 @llvm.cttz.i64(i64 [[X]], i1 true)
-; CHECK-NEXT:    [[RET:%.*]] = call i64 @llvm.umin.i64(i64 [[CTTZ]], i64 6)
+; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[X]], 64
+; CHECK-NEXT:    [[RET:%.*]] = call range(i64 0, 7) i64 @llvm.cttz.i64(i64 [[TMP1]], i1 true)
 ; CHECK-NEXT:    ret i64 [[RET]]
 ;
   %cttz = call i64 @llvm.cttz.i64(i64 %X, i1 true)
@@ -129,8 +129,8 @@ define i1 @umin_cttz_i1_zero_defined(i1 %X) {
 define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(<2 x i32> %X) {
 ; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(
 ; CHECK-SAME: <2 x i32> [[X:%.*]]) {
-; CHECK-NEXT:    [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
-; CHECK-NEXT:    [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 6, i32 6>)
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 64, i32 64>
+; CHECK-NEXT:    [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP1]], i1 true)
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
   %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
@@ -191,8 +191,8 @@ define i16 @umin_cttz_i16_negative_two_uses(i16 %X) {
 define i8 @umin_ctlz_i8_zero_undefined(i8 %X) {
 ; CHECK-LABEL: define i8 @umin_ctlz_i8_zero_undefined(
 ; CHECK-SAME: i8 [[X:%.*]]) {
-; CHECK-NEXT:    [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 true)
-; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6)
+; CHECK-NEXT:    [[TMP1:%.*]] = or i8 [[X]], 2
+; CHECK-NEXT:    [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true)
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
   %ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 true)
@@ -203,8 +203,8 @@ define i8 @umin_ctlz_i8_zero_undefined(i8 %X) {
 define i8 @umin_ctlz_i8_zero_defined(i8 %X) {
 ; CHECK-LABEL: define i8 @umin_ctlz_i8_zero_defined(
 ; CHECK-SAME: i8 [[X:%.*]]) {
-; CHECK-NEXT:    [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 false)
-; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6)
+; CHECK-NEXT:    [[TMP1:%.*]] = or i8 [[X]], 2
+; CHECK-NEXT:    [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true)
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
   %ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 false)
@@ -215,8 +215,8 @@ define i8 @umin_ctlz_i8_zero_defined(i8 %X) {
 define i8 @umin_ctlz_i8_commuted_zero_undefined(i8 %X) {
 ; CHECK-LABEL: define i8 @umin_ctlz_i8_commuted_zero_undefined(
 ; CHECK-SAME: i8 [[X:%.*]]) {
-; CHECK-NEXT:    [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 true)
-; CHECK-NEXT:    [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6)
+; CHECK-NEXT:    [[TMP1:%.*]] = or i8 [[X]], 2
+; CHECK-NEXT:    [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true)
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
   %ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 true)
@@ -238,8 +238,8 @@ define i8 @umin_ctlz_i8_ge_bitwidth_zero_undefined(i8 %X) {
 define i16 @umin_ctlz_i16_zero_undefined(i16 %X) {
 ; CHECK-LABEL: define i16 @umin_ctlz_i16_zero_undefined(
 ; CHECK-SAME: i16 [[X:%.*]]) {
-; CHECK-NEXT:    [[CTLZ:%.*]] = call range(i16 0, 17) i16 @llvm.ctlz.i16(i16 [[X]], i1 true)
-; CHECK-NEXT:    [[RET:%.*]] = call i16 @llvm.umin.i16(i16 [[CTLZ]], i16 6)
+; CHECK-NEXT:    [[TMP1:%.*]] = or i16 [[X]], 512
+; CHECK-NEXT:    [[RET:%.*]] = call range(i16 0, 7) i16 @llvm.ctlz.i16(i16 [[TMP1]], i1 true)
 ; CHECK-NEXT:    ret i16 [[RET]]
 ;
   %ctlz = call i16 @llvm.ctlz.i16(i16 %X, i1 true)
@@ -250,8 +250,8 @@ define i16 @umin_ctlz_i16_zero_undefined(i16 %X) {
 define i32 @umin_ctlz_i32_zero_undefined(i32 %X) {
 ; CHECK-LABEL: define i32 @umin_ctlz_i32_zero_undefined(
 ; CHECK-SAME: i32 [[X:%.*]]) {
-; CHECK-NEXT:    [[CTLZ:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
-; CHECK-NEXT:    [[RET:%.*]] = call i32 @llvm.umin.i32(i32 [[CTLZ]], i32 6)
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X]], 33554432
+; CHECK-NEXT:    [[RET:%.*]] = call range(i32 0, 7) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 true)
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
   %ctlz = call i32 @llvm.ctlz.i32(i32 %X, i1 true)
@@ -262,8 +262,8 @@ define i32 @umin_ctlz_i32_zero_undefined(i32 %X) {
 define i64 @umin_ctlz_i64_zero_undefined(i64 %X) {
 ; CHECK-LABEL: define i64 @umin_ctlz_i64_zero_undefined(
 ; CHECK-SAME: i64 [[X:%.*]]) {
-; CHECK-NEXT:    [[CTLZ:%.*]] = call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
-; CHECK-NEXT:    [[RET:%.*]] = call i64 @llvm.umin.i64(i64 [[CTLZ]], i64 6)
+; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[X]], 144115188075855872
+; CHECK-NEXT:    [[RET:%.*]] = call range(i64 0, 7) i64 @llvm.ctlz.i64(i64 [[TMP1]], i1 true)
 ; CHECK-NEXT:    ret i64 [[RET]]
 ;
   %ctlz = call i64 @llvm.ctlz.i64(i64 %X, i1 true)
@@ -295,8 +295,8 @@ define i1 @umin_ctlz_i1_zero_defined(i1 %X) {
 define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(<2 x i32> %X) {
 ; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(
 ; CHECK-SAME: <2 x i32> [[X:%.*]]) {
-; CHECK-NEXT:    [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
-; CHECK-NEXT:    [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 6, i32 6>)
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 33554432, i32 33554432>
+; CHECK-NEXT:    [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP1]], i1 true)
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
   %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)



More information about the llvm-commits mailing list