[llvm] [InstCombine] Fold Minimum over Trailing/Leading Bits Counts (PR #90402)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 21 22:39:12 PDT 2024
https://github.com/mskamp updated https://github.com/llvm/llvm-project/pull/90402
>From a2bce7f4fab05382ce02bce44a025b3b380e55eb Mon Sep 17 00:00:00 2001
From: Marius Kamp <msk at posteo.org>
Date: Sat, 27 Apr 2024 17:51:44 +0200
Subject: [PATCH 1/2] [InstCombine][NFC] Add Tests for Minimum over
Trailing/Leading Bits Counts
---
.../Transforms/InstCombine/umin_cttz_ctlz.ll | 382 ++++++++++++++++++
1 file changed, 382 insertions(+)
create mode 100644 llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
diff --git a/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
new file mode 100644
index 0000000000000..0ebe22fe64062
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
@@ -0,0 +1,382 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i8 @umin_cttz_i8_zero_undefined(i8 %X) {
+; CHECK-LABEL: define i8 @umin_cttz_i8_zero_undefined(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 true)
+; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6)
+; CHECK-NEXT: ret i8 [[RET]]
+;
+ %cttz = call i8 @llvm.cttz.i8(i8 %X, i1 true)
+ %ret = call i8 @llvm.umin.i8(i8 %cttz, i8 6)
+ ret i8 %ret
+}
+
+define i8 @umin_cttz_i8_zero_defined(i8 %X) {
+; CHECK-LABEL: define i8 @umin_cttz_i8_zero_defined(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 false)
+; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6)
+; CHECK-NEXT: ret i8 [[RET]]
+;
+ %cttz = call i8 @llvm.cttz.i8(i8 %X, i1 false)
+ %ret = call i8 @llvm.umin.i8(i8 %cttz, i8 6)
+ ret i8 %ret
+}
+
+define i8 @umin_cttz_i8_commuted_zero_undefined(i8 %X) {
+; CHECK-LABEL: define i8 @umin_cttz_i8_commuted_zero_undefined(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 true)
+; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6)
+; CHECK-NEXT: ret i8 [[RET]]
+;
+ %cttz = call i8 @llvm.cttz.i8(i8 %X, i1 true)
+ %ret = call i8 @llvm.umin.i8(i8 6, i8 %cttz)
+ ret i8 %ret
+}
+
+define i8 @umin_cttz_i8_ge_bitwidth_zero_undefined(i8 %X) {
+; CHECK-LABEL: define i8 @umin_cttz_i8_ge_bitwidth_zero_undefined(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 true)
+; CHECK-NEXT: ret i8 [[CTTZ]]
+;
+ %cttz = call i8 @llvm.cttz.i8(i8 %X, i1 true)
+ %ret = call i8 @llvm.umin.i8(i8 %cttz, i8 10)
+ ret i8 %ret
+}
+
+define i16 @umin_cttz_i16_zero_undefined(i16 %X) {
+; CHECK-LABEL: define i16 @umin_cttz_i16_zero_undefined(
+; CHECK-SAME: i16 [[X:%.*]]) {
+; CHECK-NEXT: [[CTTZ:%.*]] = call range(i16 0, 17) i16 @llvm.cttz.i16(i16 [[X]], i1 true)
+; CHECK-NEXT: [[RET:%.*]] = call i16 @llvm.umin.i16(i16 [[CTTZ]], i16 6)
+; CHECK-NEXT: ret i16 [[RET]]
+;
+ %cttz = call i16 @llvm.cttz.i16(i16 %X, i1 true)
+ %ret = call i16 @llvm.umin.i16(i16 %cttz, i16 6)
+ ret i16 %ret
+}
+
+define i32 @umin_cttz_i32_zero_undefined(i32 %X) {
+; CHECK-LABEL: define i32 @umin_cttz_i32_zero_undefined(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[X]], i1 true)
+; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.umin.i32(i32 [[CTTZ]], i32 6)
+; CHECK-NEXT: ret i32 [[RET]]
+;
+ %cttz = call i32 @llvm.cttz.i32(i32 %X, i1 true)
+ %ret = call i32 @llvm.umin.i32(i32 %cttz, i32 6)
+ ret i32 %ret
+}
+
+define i64 @umin_cttz_i64_zero_undefined(i64 %X) {
+; CHECK-LABEL: define i64 @umin_cttz_i64_zero_undefined(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[CTTZ:%.*]] = call range(i64 0, 65) i64 @llvm.cttz.i64(i64 [[X]], i1 true)
+; CHECK-NEXT: [[RET:%.*]] = call i64 @llvm.umin.i64(i64 [[CTTZ]], i64 6)
+; CHECK-NEXT: ret i64 [[RET]]
+;
+ %cttz = call i64 @llvm.cttz.i64(i64 %X, i1 true)
+ %ret = call i64 @llvm.umin.i64(i64 %cttz, i64 6)
+ ret i64 %ret
+}
+
+define i1 @umin_cttz_i1_zero_undefined(i1 %X) {
+; CHECK-LABEL: define i1 @umin_cttz_i1_zero_undefined(
+; CHECK-SAME: i1 [[X:%.*]]) {
+; CHECK-NEXT: ret i1 false
+;
+ %cttz = call i1 @llvm.cttz.i1(i1 %X, i1 true)
+ %ret = call i1 @llvm.umin.i1(i1 %cttz, i1 1)
+ ret i1 %ret
+}
+
+define i1 @umin_cttz_i1_zero_defined(i1 %X) {
+; CHECK-LABEL: define i1 @umin_cttz_i1_zero_defined(
+; CHECK-SAME: i1 [[X:%.*]]) {
+; CHECK-NEXT: [[CTTZ:%.*]] = xor i1 [[X]], true
+; CHECK-NEXT: ret i1 [[CTTZ]]
+;
+ %cttz = call i1 @llvm.cttz.i1(i1 %X, i1 false)
+ %ret = call i1 @llvm.umin.i1(i1 %cttz, i1 1)
+ ret i1 %ret
+}
+
+define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(<2 x i32> %X) {
+; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(
+; CHECK-SAME: <2 x i32> [[X:%.*]]) {
+; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
+; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 6, i32 6>)
+; CHECK-NEXT: ret <2 x i32> [[RET]]
+;
+ %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
+ %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %cttz, <2 x i32> <i32 6, i32 6>)
+ ret <2 x i32> %ret
+}
+
+define <2 x i32> @umin_cttz_2xi32_splat_poison_zero_undefined(<2 x i32> %X) {
+; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_splat_poison_zero_undefined(
+; CHECK-SAME: <2 x i32> [[X:%.*]]) {
+; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
+; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 6, i32 poison>)
+; CHECK-NEXT: ret <2 x i32> [[RET]]
+;
+ %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
+ %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %cttz, <2 x i32> <i32 6, i32 poison>)
+ ret <2 x i32> %ret
+}
+
+define <2 x i32> @umin_cttz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(<2 x i32> %X) {
+; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(
+; CHECK-SAME: <2 x i32> [[X:%.*]]) {
+; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
+; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 6, i32 0>)
+; CHECK-NEXT: ret <2 x i32> [[RET]]
+;
+ %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
+ %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %cttz, <2 x i32> <i32 6, i32 0>)
+ ret <2 x i32> %ret
+}
+
+define <2 x i32> @umin_cttz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefined(<2 x i32> %X) {
+; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefined(
+; CHECK-SAME: <2 x i32> [[X:%.*]]) {
+; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
+; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 6, i32 64>)
+; CHECK-NEXT: ret <2 x i32> [[RET]]
+;
+ %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
+ %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %cttz, <2 x i32> <i32 6, i32 64>)
+ ret <2 x i32> %ret
+}
+
+define <2 x i32> @umin_cttz_2xi32_no_splat_none_lt_bitwidth_zero_undefined(<2 x i32> %X) {
+; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_no_splat_none_lt_bitwidth_zero_undefined(
+; CHECK-SAME: <2 x i32> [[X:%.*]]) {
+; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
+; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 32, i32 64>)
+; CHECK-NEXT: ret <2 x i32> [[RET]]
+;
+ %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
+ %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %cttz, <2 x i32> <i32 32, i32 64>)
+ ret <2 x i32> %ret
+}
+
+define i16 @umin_cttz_i16_negative_non_constant(i16 %X, i16 %Y) {
+; CHECK-LABEL: define i16 @umin_cttz_i16_negative_non_constant(
+; CHECK-SAME: i16 [[X:%.*]], i16 [[Y:%.*]]) {
+; CHECK-NEXT: [[CTTZ:%.*]] = call range(i16 0, 17) i16 @llvm.cttz.i16(i16 [[X]], i1 true)
+; CHECK-NEXT: [[RET:%.*]] = call i16 @llvm.umin.i16(i16 [[CTTZ]], i16 [[Y]])
+; CHECK-NEXT: ret i16 [[RET]]
+;
+ %cttz = call i16 @llvm.cttz.i16(i16 %X, i1 true)
+ %ret = call i16 @llvm.umin.i16(i16 %cttz, i16 %Y)
+ ret i16 %ret
+}
+
+define i16 @umin_cttz_i16_negative_two_uses(i16 %X) {
+; CHECK-LABEL: define i16 @umin_cttz_i16_negative_two_uses(
+; CHECK-SAME: i16 [[X:%.*]]) {
+; CHECK-NEXT: [[CTTZ:%.*]] = call range(i16 0, 17) i16 @llvm.cttz.i16(i16 [[X]], i1 true)
+; CHECK-NEXT: [[OP0:%.*]] = call i16 @llvm.umin.i16(i16 [[CTTZ]], i16 6)
+; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i16 [[CTTZ]], [[OP0]]
+; CHECK-NEXT: ret i16 [[RET]]
+;
+ %cttz = call i16 @llvm.cttz.i16(i16 %X, i1 true)
+ %op0 = call i16 @llvm.umin.i16(i16 %cttz, i16 6)
+ %ret = add i16 %cttz, %op0
+ ret i16 %ret
+}
+
+define i8 @umin_ctlz_i8_zero_undefined(i8 %X) {
+; CHECK-LABEL: define i8 @umin_ctlz_i8_zero_undefined(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 true)
+; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6)
+; CHECK-NEXT: ret i8 [[RET]]
+;
+ %ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 true)
+ %ret = call i8 @llvm.umin.i8(i8 %ctlz, i8 6)
+ ret i8 %ret
+}
+
+define i8 @umin_ctlz_i8_zero_defined(i8 %X) {
+; CHECK-LABEL: define i8 @umin_ctlz_i8_zero_defined(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 false)
+; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6)
+; CHECK-NEXT: ret i8 [[RET]]
+;
+ %ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 false)
+ %ret = call i8 @llvm.umin.i8(i8 %ctlz, i8 6)
+ ret i8 %ret
+}
+
+define i8 @umin_ctlz_i8_commuted_zero_undefined(i8 %X) {
+; CHECK-LABEL: define i8 @umin_ctlz_i8_commuted_zero_undefined(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 true)
+; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6)
+; CHECK-NEXT: ret i8 [[RET]]
+;
+ %ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 true)
+ %ret = call i8 @llvm.umin.i8(i8 6, i8 %ctlz)
+ ret i8 %ret
+}
+
+define i8 @umin_ctlz_i8_ge_bitwidth_zero_undefined(i8 %X) {
+; CHECK-LABEL: define i8 @umin_ctlz_i8_ge_bitwidth_zero_undefined(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 true)
+; CHECK-NEXT: ret i8 [[CTLZ]]
+;
+ %ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 true)
+ %ret = call i8 @llvm.umin.i8(i8 %ctlz, i8 10)
+ ret i8 %ret
+}
+
+define i16 @umin_ctlz_i16_zero_undefined(i16 %X) {
+; CHECK-LABEL: define i16 @umin_ctlz_i16_zero_undefined(
+; CHECK-SAME: i16 [[X:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = call range(i16 0, 17) i16 @llvm.ctlz.i16(i16 [[X]], i1 true)
+; CHECK-NEXT: [[RET:%.*]] = call i16 @llvm.umin.i16(i16 [[CTLZ]], i16 6)
+; CHECK-NEXT: ret i16 [[RET]]
+;
+ %ctlz = call i16 @llvm.ctlz.i16(i16 %X, i1 true)
+ %ret = call i16 @llvm.umin.i16(i16 %ctlz, i16 6)
+ ret i16 %ret
+}
+
+define i32 @umin_ctlz_i32_zero_undefined(i32 %X) {
+; CHECK-LABEL: define i32 @umin_ctlz_i32_zero_undefined(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
+; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.umin.i32(i32 [[CTLZ]], i32 6)
+; CHECK-NEXT: ret i32 [[RET]]
+;
+ %ctlz = call i32 @llvm.ctlz.i32(i32 %X, i1 true)
+ %ret = call i32 @llvm.umin.i32(i32 %ctlz, i32 6)
+ ret i32 %ret
+}
+
+define i64 @umin_ctlz_i64_zero_undefined(i64 %X) {
+; CHECK-LABEL: define i64 @umin_ctlz_i64_zero_undefined(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
+; CHECK-NEXT: [[RET:%.*]] = call i64 @llvm.umin.i64(i64 [[CTLZ]], i64 6)
+; CHECK-NEXT: ret i64 [[RET]]
+;
+ %ctlz = call i64 @llvm.ctlz.i64(i64 %X, i1 true)
+ %ret = call i64 @llvm.umin.i64(i64 %ctlz, i64 6)
+ ret i64 %ret
+}
+
+define i1 @umin_ctlz_i1_zero_undefined(i1 %X) {
+; CHECK-LABEL: define i1 @umin_ctlz_i1_zero_undefined(
+; CHECK-SAME: i1 [[X:%.*]]) {
+; CHECK-NEXT: ret i1 false
+;
+ %ctlz = call i1 @llvm.ctlz.i1(i1 %X, i1 true)
+ %ret = call i1 @llvm.umin.i1(i1 %ctlz, i1 1)
+ ret i1 %ret
+}
+
+define i1 @umin_ctlz_i1_zero_defined(i1 %X) {
+; CHECK-LABEL: define i1 @umin_ctlz_i1_zero_defined(
+; CHECK-SAME: i1 [[X:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = xor i1 [[X]], true
+; CHECK-NEXT: ret i1 [[CTLZ]]
+;
+ %ctlz = call i1 @llvm.ctlz.i1(i1 %X, i1 false)
+ %ret = call i1 @llvm.umin.i1(i1 %ctlz, i1 1)
+ ret i1 %ret
+}
+
+define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(<2 x i32> %X) {
+; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(
+; CHECK-SAME: <2 x i32> [[X:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
+; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 6, i32 6>)
+; CHECK-NEXT: ret <2 x i32> [[RET]]
+;
+ %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)
+ %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %ctlz, <2 x i32> <i32 6, i32 6>)
+ ret <2 x i32> %ret
+}
+
+define <2 x i32> @umin_ctlz_2xi32_splat_poison_zero_undefined(<2 x i32> %X) {
+; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_splat_poison_zero_undefined(
+; CHECK-SAME: <2 x i32> [[X:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
+; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 6, i32 poison>)
+; CHECK-NEXT: ret <2 x i32> [[RET]]
+;
+ %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)
+ %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %ctlz, <2 x i32> <i32 6, i32 poison>)
+ ret <2 x i32> %ret
+}
+
+define <2 x i32> @umin_ctlz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(<2 x i32> %X) {
+; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(
+; CHECK-SAME: <2 x i32> [[X:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
+; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 6, i32 0>)
+; CHECK-NEXT: ret <2 x i32> [[RET]]
+;
+ %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)
+ %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %ctlz, <2 x i32> <i32 6, i32 0>)
+ ret <2 x i32> %ret
+}
+
+define <2 x i32> @umin_ctlz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefined(<2 x i32> %X) {
+; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefined(
+; CHECK-SAME: <2 x i32> [[X:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
+; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 6, i32 64>)
+; CHECK-NEXT: ret <2 x i32> [[RET]]
+;
+ %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)
+ %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %ctlz, <2 x i32> <i32 6, i32 64>)
+ ret <2 x i32> %ret
+}
+
+define <2 x i32> @umin_ctlz_2xi32_no_splat_none_lt_bitwidth_zero_undefined(<2 x i32> %X) {
+; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_no_splat_none_lt_bitwidth_zero_undefined(
+; CHECK-SAME: <2 x i32> [[X:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
+; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 32, i32 64>)
+; CHECK-NEXT: ret <2 x i32> [[RET]]
+;
+ %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)
+ %ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %ctlz, <2 x i32> <i32 32, i32 64>)
+ ret <2 x i32> %ret
+}
+
+define i16 @umin_ctlz_i16_negative_non_constant(i16 %X, i16 %Y) {
+; CHECK-LABEL: define i16 @umin_ctlz_i16_negative_non_constant(
+; CHECK-SAME: i16 [[X:%.*]], i16 [[Y:%.*]]) {
+; CHECK-NEXT: [[CTLZ:%.*]] = call range(i16 0, 17) i16 @llvm.ctlz.i16(i16 [[X]], i1 true)
+; CHECK-NEXT: [[RET:%.*]] = call i16 @llvm.umin.i16(i16 [[CTLZ]], i16 [[Y]])
+; CHECK-NEXT: ret i16 [[RET]]
+;
+ %ctlz = call i16 @llvm.ctlz.i16(i16 %X, i1 true)
+ %ret = call i16 @llvm.umin.i16(i16 %ctlz, i16 %Y)
+ ret i16 %ret
+}
+
+define i16 @umin_ctlz_i16_negative_two_uses(i16 %X) {
+; CHECK-LABEL: define i16 @umin_ctlz_i16_negative_two_uses(
+; CHECK-SAME: i16 [[X:%.*]]) {
+; CHECK-NEXT: [[CTTZ:%.*]] = call range(i16 0, 17) i16 @llvm.ctlz.i16(i16 [[X]], i1 true)
+; CHECK-NEXT: [[OP0:%.*]] = call i16 @llvm.umin.i16(i16 [[CTTZ]], i16 6)
+; CHECK-NEXT: [[RET:%.*]] = add nuw nsw i16 [[CTTZ]], [[OP0]]
+; CHECK-NEXT: ret i16 [[RET]]
+;
+ %ctlz = call i16 @llvm.ctlz.i16(i16 %X, i1 true)
+ %op0 = call i16 @llvm.umin.i16(i16 %ctlz, i16 6)
+ %ret = add i16 %ctlz, %op0
+ ret i16 %ret
+}
>From abec8ec2be7a8690bd34ccbea36df9e5d75fc0c6 Mon Sep 17 00:00:00 2001
From: Marius Kamp <msk at posteo.org>
Date: Sat, 27 Apr 2024 19:46:42 +0200
Subject: [PATCH 2/2] [InstCombine] Fold Minimum over Trailing/Leading Bits
Counts (#90000)
The new transformation folds `umin(cttz(x), c)` to `cttz(x | (1 << c))`
and `umin(ctlz(x), c)` to `ctlz(x | ((1 << (bitwidth - 1)) >> c))`. The
transformation is only implemented for constant `c` to not increase the
number of instructions.
The idea of the transformation is to set the c-th lowest (for `cttz`) or
highest (for `ctlz`) bit in the operand. In this way, the `cttz` or
`ctlz` instruction always returns at most `c`.
Alive2 proofs: https://alive2.llvm.org/ce/z/y8Hdb8
---
.../InstCombine/InstCombineCalls.cpp | 50 +++++++++++
.../Transforms/InstCombine/umin_cttz_ctlz.ll | 90 +++++++++----------
2 files changed, 94 insertions(+), 46 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 1913ef92c16c0..b352db0b71908 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1428,6 +1428,46 @@ static Instruction *foldBitOrderCrossLogicOp(Value *V,
return nullptr;
}
+/// Fold an unsigned minimum of trailing or leading zero bits counts:
+/// umin(cttz(CtOp, ZeroUndef), ConstOp) --> cttz(CtOp | (1 << ConstOp))
+/// umin(ctlz(CtOp, ZeroUndef), ConstOp) --> ctlz(CtOp | ((1 << (bitwidth-1))
+/// >> ConstOp))
+static Value *
+foldMinimumOverTrailingOrLeadingZeroCount(Intrinsic::ID IntrID, Value *I0,
+ Value *I1, const DataLayout &DL,
+ InstCombiner::BuilderTy &Builder) {
+ assert((IntrID == Intrinsic::cttz || IntrID == Intrinsic::ctlz) &&
+ "This helper only supports cttz and ctlz intrinsics");
+
+ if (I0->hasOneUse()) {
+ if (auto *II0 = dyn_cast<IntrinsicInst>(I0);
+ II0 && II0->getIntrinsicID() == IntrID) {
+ Value *X = II0->getArgOperand(0);
+ Value *Z = II0->getArgOperand(1);
+
+ auto BitWidth = I1->getType()->getScalarSizeInBits();
+ auto *Ty = I1->getType();
+
+ auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); };
+ if (match(I1, m_CheckedInt(LessBitWidth))) {
+ Constant *NewConst = ConstantFoldBinaryOpOperands(
+ IntrID == Intrinsic::cttz ? Instruction::Shl : Instruction::LShr,
+ IntrID == Intrinsic::cttz
+ ? ConstantInt::get(Ty, 1)
+ : ConstantInt::get(Ty, APInt::getSignedMinValue(BitWidth)),
+ cast<Constant>(I1), DL);
+ return Builder.CreateBinaryIntrinsic(IntrID,
+ Builder.CreateOr(X, NewConst), Z);
+ }
+ if (match(I1, m_CheckedInt(std::not_fn(LessBitWidth)))) {
+ return I0;
+ }
+ // Otherwise, we have a non-splat vector with elements < and >= BitWidth
+ }
+ }
+ return nullptr;
+}
+
/// CallInst simplification. This mostly only handles folding of intrinsic
/// instructions. For normal calls, it allows visitCallBase to do the heavy
/// lifting.
@@ -1633,6 +1673,16 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
Value *Cmp = Builder.CreateICmpNE(I0, Zero);
return CastInst::Create(Instruction::ZExt, Cmp, II->getType());
}
+ // umin(cttz(x), const) --> cttz(x | (1 << const))
+ if (Value *FoldedCttz = foldMinimumOverTrailingOrLeadingZeroCount(
+ Intrinsic::cttz, I0, I1, DL, Builder)) {
+ return replaceInstUsesWith(*II, FoldedCttz);
+ }
+ // umin(ctlz(x), const) --> ctlz(x | ((1 << (bitwidth - 1) >> const)))
+ if (Value *FoldedCtlz = foldMinimumOverTrailingOrLeadingZeroCount(
+ Intrinsic::ctlz, I0, I1, DL, Builder)) {
+ return replaceInstUsesWith(*II, FoldedCtlz);
+ }
[[fallthrough]];
}
case Intrinsic::umax: {
diff --git a/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
index 0ebe22fe64062..a64b0f16f1496 100644
--- a/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
+++ b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
@@ -4,8 +4,8 @@
define i8 @umin_cttz_i8_zero_undefined(i8 %X) {
; CHECK-LABEL: define i8 @umin_cttz_i8_zero_undefined(
; CHECK-SAME: i8 [[X:%.*]]) {
-; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 true)
-; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6)
+; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 64
+; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true)
; CHECK-NEXT: ret i8 [[RET]]
;
%cttz = call i8 @llvm.cttz.i8(i8 %X, i1 true)
@@ -16,8 +16,8 @@ define i8 @umin_cttz_i8_zero_undefined(i8 %X) {
define i8 @umin_cttz_i8_zero_defined(i8 %X) {
; CHECK-LABEL: define i8 @umin_cttz_i8_zero_defined(
; CHECK-SAME: i8 [[X:%.*]]) {
-; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 false)
-; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6)
+; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 64
+; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true)
; CHECK-NEXT: ret i8 [[RET]]
;
%cttz = call i8 @llvm.cttz.i8(i8 %X, i1 false)
@@ -28,8 +28,8 @@ define i8 @umin_cttz_i8_zero_defined(i8 %X) {
define i8 @umin_cttz_i8_commuted_zero_undefined(i8 %X) {
; CHECK-LABEL: define i8 @umin_cttz_i8_commuted_zero_undefined(
; CHECK-SAME: i8 [[X:%.*]]) {
-; CHECK-NEXT: [[CTTZ:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[X]], i1 true)
-; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTTZ]], i8 6)
+; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 64
+; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.cttz.i8(i8 [[TMP1]], i1 true)
; CHECK-NEXT: ret i8 [[RET]]
;
%cttz = call i8 @llvm.cttz.i8(i8 %X, i1 true)
@@ -51,8 +51,8 @@ define i8 @umin_cttz_i8_ge_bitwidth_zero_undefined(i8 %X) {
define i16 @umin_cttz_i16_zero_undefined(i16 %X) {
; CHECK-LABEL: define i16 @umin_cttz_i16_zero_undefined(
; CHECK-SAME: i16 [[X:%.*]]) {
-; CHECK-NEXT: [[CTTZ:%.*]] = call range(i16 0, 17) i16 @llvm.cttz.i16(i16 [[X]], i1 true)
-; CHECK-NEXT: [[RET:%.*]] = call i16 @llvm.umin.i16(i16 [[CTTZ]], i16 6)
+; CHECK-NEXT: [[TMP1:%.*]] = or i16 [[X]], 64
+; CHECK-NEXT: [[RET:%.*]] = call range(i16 0, 7) i16 @llvm.cttz.i16(i16 [[TMP1]], i1 true)
; CHECK-NEXT: ret i16 [[RET]]
;
%cttz = call i16 @llvm.cttz.i16(i16 %X, i1 true)
@@ -63,8 +63,8 @@ define i16 @umin_cttz_i16_zero_undefined(i16 %X) {
define i32 @umin_cttz_i32_zero_undefined(i32 %X) {
; CHECK-LABEL: define i32 @umin_cttz_i32_zero_undefined(
; CHECK-SAME: i32 [[X:%.*]]) {
-; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[X]], i1 true)
-; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.umin.i32(i32 [[CTTZ]], i32 6)
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X]], 64
+; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) i32 @llvm.cttz.i32(i32 [[TMP1]], i1 true)
; CHECK-NEXT: ret i32 [[RET]]
;
%cttz = call i32 @llvm.cttz.i32(i32 %X, i1 true)
@@ -75,8 +75,8 @@ define i32 @umin_cttz_i32_zero_undefined(i32 %X) {
define i64 @umin_cttz_i64_zero_undefined(i64 %X) {
; CHECK-LABEL: define i64 @umin_cttz_i64_zero_undefined(
; CHECK-SAME: i64 [[X:%.*]]) {
-; CHECK-NEXT: [[CTTZ:%.*]] = call range(i64 0, 65) i64 @llvm.cttz.i64(i64 [[X]], i1 true)
-; CHECK-NEXT: [[RET:%.*]] = call i64 @llvm.umin.i64(i64 [[CTTZ]], i64 6)
+; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[X]], 64
+; CHECK-NEXT: [[RET:%.*]] = call range(i64 0, 7) i64 @llvm.cttz.i64(i64 [[TMP1]], i1 true)
; CHECK-NEXT: ret i64 [[RET]]
;
%cttz = call i64 @llvm.cttz.i64(i64 %X, i1 true)
@@ -108,8 +108,8 @@ define i1 @umin_cttz_i1_zero_defined(i1 %X) {
define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(<2 x i32> %X) {
; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
-; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
-; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 6, i32 6>)
+; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 64, i32 64>
+; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP1]], i1 true)
; CHECK-NEXT: ret <2 x i32> [[RET]]
;
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
@@ -120,8 +120,8 @@ define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(<2 x i32> %X) {
define <2 x i32> @umin_cttz_2xi32_splat_poison_zero_undefined(<2 x i32> %X) {
; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_splat_poison_zero_undefined(
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
-; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
-; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 6, i32 poison>)
+; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 64, i32 poison>
+; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP1]], i1 true)
; CHECK-NEXT: ret <2 x i32> [[RET]]
;
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
@@ -132,8 +132,8 @@ define <2 x i32> @umin_cttz_2xi32_splat_poison_zero_undefined(<2 x i32> %X) {
define <2 x i32> @umin_cttz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(<2 x i32> %X) {
; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
-; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
-; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 6, i32 0>)
+; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 64, i32 1>
+; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP1]], i1 true)
; CHECK-NEXT: ret <2 x i32> [[RET]]
;
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
@@ -144,9 +144,9 @@ define <2 x i32> @umin_cttz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(<2 x i
define <2 x i32> @umin_cttz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefined(<2 x i32> %X) {
; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefined(
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
-; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
-; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 6, i32 64>)
-; CHECK-NEXT: ret <2 x i32> [[RET]]
+; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
+; CHECK-NEXT: [[RET1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[RET]], <2 x i32> <i32 6, i32 64>)
+; CHECK-NEXT: ret <2 x i32> [[RET1]]
;
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
%ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %cttz, <2 x i32> <i32 6, i32 64>)
@@ -156,8 +156,7 @@ define <2 x i32> @umin_cttz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefi
define <2 x i32> @umin_cttz_2xi32_no_splat_none_lt_bitwidth_zero_undefined(<2 x i32> %X) {
; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_no_splat_none_lt_bitwidth_zero_undefined(
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
-; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
-; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTTZ]], <2 x i32> <i32 32, i32 64>)
+; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X]], i1 true)
; CHECK-NEXT: ret <2 x i32> [[RET]]
;
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %X, i1 true)
@@ -194,8 +193,8 @@ define i16 @umin_cttz_i16_negative_two_uses(i16 %X) {
define i8 @umin_ctlz_i8_zero_undefined(i8 %X) {
; CHECK-LABEL: define i8 @umin_ctlz_i8_zero_undefined(
; CHECK-SAME: i8 [[X:%.*]]) {
-; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 true)
-; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6)
+; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 2
+; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true)
; CHECK-NEXT: ret i8 [[RET]]
;
%ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 true)
@@ -206,8 +205,8 @@ define i8 @umin_ctlz_i8_zero_undefined(i8 %X) {
define i8 @umin_ctlz_i8_zero_defined(i8 %X) {
; CHECK-LABEL: define i8 @umin_ctlz_i8_zero_defined(
; CHECK-SAME: i8 [[X:%.*]]) {
-; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 false)
-; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6)
+; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 2
+; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true)
; CHECK-NEXT: ret i8 [[RET]]
;
%ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 false)
@@ -218,8 +217,8 @@ define i8 @umin_ctlz_i8_zero_defined(i8 %X) {
define i8 @umin_ctlz_i8_commuted_zero_undefined(i8 %X) {
; CHECK-LABEL: define i8 @umin_ctlz_i8_commuted_zero_undefined(
; CHECK-SAME: i8 [[X:%.*]]) {
-; CHECK-NEXT: [[CTLZ:%.*]] = call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[X]], i1 true)
-; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.umin.i8(i8 [[CTLZ]], i8 6)
+; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X]], 2
+; CHECK-NEXT: [[RET:%.*]] = call range(i8 0, 7) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true)
; CHECK-NEXT: ret i8 [[RET]]
;
%ctlz = call i8 @llvm.ctlz.i8(i8 %X, i1 true)
@@ -241,8 +240,8 @@ define i8 @umin_ctlz_i8_ge_bitwidth_zero_undefined(i8 %X) {
define i16 @umin_ctlz_i16_zero_undefined(i16 %X) {
; CHECK-LABEL: define i16 @umin_ctlz_i16_zero_undefined(
; CHECK-SAME: i16 [[X:%.*]]) {
-; CHECK-NEXT: [[CTLZ:%.*]] = call range(i16 0, 17) i16 @llvm.ctlz.i16(i16 [[X]], i1 true)
-; CHECK-NEXT: [[RET:%.*]] = call i16 @llvm.umin.i16(i16 [[CTLZ]], i16 6)
+; CHECK-NEXT: [[TMP1:%.*]] = or i16 [[X]], 512
+; CHECK-NEXT: [[RET:%.*]] = call range(i16 0, 7) i16 @llvm.ctlz.i16(i16 [[TMP1]], i1 true)
; CHECK-NEXT: ret i16 [[RET]]
;
%ctlz = call i16 @llvm.ctlz.i16(i16 %X, i1 true)
@@ -253,8 +252,8 @@ define i16 @umin_ctlz_i16_zero_undefined(i16 %X) {
define i32 @umin_ctlz_i32_zero_undefined(i32 %X) {
; CHECK-LABEL: define i32 @umin_ctlz_i32_zero_undefined(
; CHECK-SAME: i32 [[X:%.*]]) {
-; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[X]], i1 true)
-; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.umin.i32(i32 [[CTLZ]], i32 6)
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X]], 33554432
+; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) i32 @llvm.ctlz.i32(i32 [[TMP1]], i1 true)
; CHECK-NEXT: ret i32 [[RET]]
;
%ctlz = call i32 @llvm.ctlz.i32(i32 %X, i1 true)
@@ -265,8 +264,8 @@ define i32 @umin_ctlz_i32_zero_undefined(i32 %X) {
define i64 @umin_ctlz_i64_zero_undefined(i64 %X) {
; CHECK-LABEL: define i64 @umin_ctlz_i64_zero_undefined(
; CHECK-SAME: i64 [[X:%.*]]) {
-; CHECK-NEXT: [[CTLZ:%.*]] = call range(i64 0, 65) i64 @llvm.ctlz.i64(i64 [[X]], i1 true)
-; CHECK-NEXT: [[RET:%.*]] = call i64 @llvm.umin.i64(i64 [[CTLZ]], i64 6)
+; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[X]], 144115188075855872
+; CHECK-NEXT: [[RET:%.*]] = call range(i64 0, 7) i64 @llvm.ctlz.i64(i64 [[TMP1]], i1 true)
; CHECK-NEXT: ret i64 [[RET]]
;
%ctlz = call i64 @llvm.ctlz.i64(i64 %X, i1 true)
@@ -298,8 +297,8 @@ define i1 @umin_ctlz_i1_zero_defined(i1 %X) {
define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(<2 x i32> %X) {
; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
-; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
-; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 6, i32 6>)
+; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 33554432, i32 33554432>
+; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP1]], i1 true)
; CHECK-NEXT: ret <2 x i32> [[RET]]
;
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)
@@ -310,8 +309,8 @@ define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(<2 x i32> %X) {
define <2 x i32> @umin_ctlz_2xi32_splat_poison_zero_undefined(<2 x i32> %X) {
; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_splat_poison_zero_undefined(
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
-; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
-; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 6, i32 poison>)
+; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 33554432, i32 poison>
+; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP1]], i1 true)
; CHECK-NEXT: ret <2 x i32> [[RET]]
;
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)
@@ -322,8 +321,8 @@ define <2 x i32> @umin_ctlz_2xi32_splat_poison_zero_undefined(<2 x i32> %X) {
define <2 x i32> @umin_ctlz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(<2 x i32> %X) {
; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
-; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
-; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 6, i32 0>)
+; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 33554432, i32 -2147483648>
+; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP1]], i1 true)
; CHECK-NEXT: ret <2 x i32> [[RET]]
;
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)
@@ -334,9 +333,9 @@ define <2 x i32> @umin_ctlz_2xi32_no_splat_all_lt_bitwidth_zero_undefined(<2 x i
define <2 x i32> @umin_ctlz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefined(<2 x i32> %X) {
; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefined(
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
-; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
-; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 6, i32 64>)
-; CHECK-NEXT: ret <2 x i32> [[RET]]
+; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
+; CHECK-NEXT: [[RET1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[RET]], <2 x i32> <i32 6, i32 64>)
+; CHECK-NEXT: ret <2 x i32> [[RET1]]
;
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)
%ret = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %ctlz, <2 x i32> <i32 6, i32 64>)
@@ -346,8 +345,7 @@ define <2 x i32> @umin_ctlz_2xi32_negative_no_splat_some_lt_bitwidth_zero_undefi
define <2 x i32> @umin_ctlz_2xi32_no_splat_none_lt_bitwidth_zero_undefined(<2 x i32> %X) {
; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_no_splat_none_lt_bitwidth_zero_undefined(
; CHECK-SAME: <2 x i32> [[X:%.*]]) {
-; CHECK-NEXT: [[CTLZ:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
-; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[CTLZ]], <2 x i32> <i32 32, i32 64>)
+; CHECK-NEXT: [[RET:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X]], i1 true)
; CHECK-NEXT: ret <2 x i32> [[RET]]
;
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %X, i1 true)
More information about the llvm-commits
mailing list