[llvm] [SelectionDAG]: Deduce KnownNeverZero from SMIN and SMAX (PR #85722)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 22 14:51:43 PDT 2024
https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/85722
>From da746a74b423656e61ecc3d314e84eb11e304c61 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Mon, 18 Mar 2024 20:40:32 -0400
Subject: [PATCH 1/2] [SelectionDAG]: Pre-commit tests (NFC)
---
llvm/test/CodeGen/X86/known-never-zero.ll | 1046 ++++++++++++++++++++-
1 file changed, 1002 insertions(+), 44 deletions(-)
diff --git a/llvm/test/CodeGen/X86/known-never-zero.ll b/llvm/test/CodeGen/X86/known-never-zero.ll
index cc9862769f2b66..826ce4619ceb30 100644
--- a/llvm/test/CodeGen/X86/known-never-zero.ll
+++ b/llvm/test/CodeGen/X86/known-never-zero.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK
+; RUN: llc < %s -mtriple=i386-unknown-unknown | FileCheck %s --check-prefix=X86
;; Use cttz to test if we properly prove never-zero. There is a very
;; simple transform from cttz -> cttz_zero_undef if its operand is
@@ -9,10 +10,13 @@ declare i32 @llvm.uadd.sat.i32(i32, i32)
declare i32 @llvm.umax.i32(i32, i32)
declare i32 @llvm.umin.i32(i32, i32)
declare i32 @llvm.smin.i32(i32, i32)
+declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
declare i32 @llvm.smax.i32(i32, i32)
+declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
declare i32 @llvm.bswap.i32(i32)
declare i32 @llvm.bitreverse.i32(i32)
declare i32 @llvm.ctpop.i32(i32)
+declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
declare i32 @llvm.abs.i32(i32, i1)
declare i32 @llvm.fshl.i32(i32, i32, i32)
declare i32 @llvm.fshr.i32(i32, i32, i32)
@@ -23,6 +27,13 @@ define i32 @or_known_nonzero(i32 %x) {
; CHECK-NEXT: orl $1, %edi
; CHECK-NEXT: rep bsfl %edi, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: or_known_nonzero:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl $1, %eax
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
%z = or i32 %x, 1
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
@@ -39,6 +50,18 @@ define i32 @or_maybe_zero(i32 %x, i32 %y) {
; CHECK-NEXT: .LBB1_1:
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: or_maybe_zero:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: je .LBB1_1
+; X86-NEXT: # %bb.2: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB1_1:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
%z = or i32 %x, %y
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
@@ -53,6 +76,20 @@ define i32 @select_known_nonzero(i1 %c, i32 %x) {
; CHECK-NEXT: cmovnel %esi, %eax
; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: select_known_nonzero:
+; X86: # %bb.0:
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: jne .LBB2_1
+; X86-NEXT: # %bb.2:
+; X86-NEXT: movl $122, %eax
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB2_1:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl $1, %eax
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
%y = or i32 %x, 1
%z = select i1 %c, i32 %y, i32 122
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -74,6 +111,26 @@ define i32 @select_maybe_zero(i1 %c, i32 %x) {
; CHECK-NEXT: .LBB3_1:
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: select_maybe_zero:
+; X86: # %bb.0:
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: jne .LBB3_1
+; X86-NEXT: # %bb.2:
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: je .LBB3_4
+; X86-NEXT: .LBB3_5: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB3_1:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl $1, %eax
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: jne .LBB3_5
+; X86-NEXT: .LBB3_4:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
%y = or i32 %x, 1
%z = select i1 %c, i32 %y, i32 0
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -89,6 +146,14 @@ define i32 @shl_known_nonzero_1s_bit_set(i32 %x) {
; CHECK-NEXT: shll %cl, %eax
; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: shl_known_nonzero_1s_bit_set:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $123, %eax
+; X86-NEXT: shll %cl, %eax
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
%z = shl i32 123, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
@@ -103,6 +168,15 @@ define i32 @shl_known_nonzero_nsw(i32 %x, i32 %yy) {
; CHECK-NEXT: shll %cl, %esi
; CHECK-NEXT: rep bsfl %esi, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: shl_known_nonzero_nsw:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $256, %eax # imm = 0x100
+; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: shll %cl, %eax
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
%y = or i32 %yy, 256
%z = shl nsw i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -118,6 +192,15 @@ define i32 @shl_known_nonzero_nuw(i32 %x, i32 %yy) {
; CHECK-NEXT: shll %cl, %esi
; CHECK-NEXT: rep bsfl %esi, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: shl_known_nonzero_nuw:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $256, %eax # imm = 0x100
+; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: shll %cl, %eax
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
%y = or i32 %yy, 256
%z = shl nuw i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -138,6 +221,20 @@ define i32 @shl_maybe_zero(i32 %x, i32 %y) {
; CHECK-NEXT: .LBB7_1:
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: shl_maybe_zero:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: shll %cl, %eax
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: je .LBB7_1
+; X86-NEXT: # %bb.2: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB7_1:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
%z = shl nuw nsw i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
@@ -151,6 +248,18 @@ define i32 @uaddsat_known_nonzero(i32 %x) {
; CHECK-NEXT: cmovnel %edi, %eax
; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: uaddsat_known_nonzero:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl $1, %eax
+; X86-NEXT: movl $-1, %ecx
+; X86-NEXT: je .LBB8_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: .LBB8_2:
+; X86-NEXT: rep bsfl %ecx, %eax
+; X86-NEXT: retl
%z = call i32 @llvm.uadd.sat.i32(i32 %x, i32 1)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
@@ -170,6 +279,24 @@ define i32 @uaddsat_maybe_zero(i32 %x, i32 %y) {
; CHECK-NEXT: .LBB9_1:
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: uaddsat_maybe_zero:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $-1, %eax
+; X86-NEXT: jb .LBB9_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: .LBB9_2:
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: je .LBB9_3
+; X86-NEXT: # %bb.4: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB9_3:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
%z = call i32 @llvm.uadd.sat.i32(i32 %x, i32 %y)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
@@ -186,6 +313,20 @@ define i32 @umax_known_nonzero(i32 %x, i32 %y) {
; CHECK-NEXT: cmoval %edi, %eax
; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: umax_known_nonzero:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $4, %edx
+; X86-NEXT: shll %cl, %edx
+; X86-NEXT: cmpl %edx, %eax
+; X86-NEXT: ja .LBB10_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: .LBB10_2:
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
%yy = shl nuw i32 4, %y
%z = call i32 @llvm.umax.i32(i32 %x, i32 %yy)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -205,6 +346,24 @@ define i32 @umax_maybe_zero(i32 %x, i32 %y) {
; CHECK-NEXT: .LBB11_1:
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: umax_maybe_zero:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl %ecx, %eax
+; X86-NEXT: ja .LBB11_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: .LBB11_2:
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: je .LBB11_3
+; X86-NEXT: # %bb.4: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB11_3:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
%z = call i32 @llvm.umax.i32(i32 %x, i32 %y)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
@@ -222,6 +381,21 @@ define i32 @umin_known_nonzero(i32 %xx, i32 %yy) {
; CHECK-NEXT: cmovbl %eax, %esi
; CHECK-NEXT: rep bsfl %esi, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: umin_known_nonzero:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $4, %edx
+; X86-NEXT: shll %cl, %edx
+; X86-NEXT: addl $4, %eax
+; X86-NEXT: cmpl %eax, %edx
+; X86-NEXT: jb .LBB12_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: .LBB12_2:
+; X86-NEXT: rep bsfl %edx, %eax
+; X86-NEXT: retl
%x = shl nuw i32 4, %xx
%y = add nuw nsw i32 %yy, 4
%z = call i32 @llvm.umin.i32(i32 %x, i32 %y)
@@ -243,6 +417,23 @@ define i32 @umin_maybe_zero(i32 %x, i32 %y) {
; CHECK-NEXT: .LBB13_1:
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: umin_maybe_zero:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl $54, %eax
+; X86-NEXT: jb .LBB13_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl $54, %eax
+; X86-NEXT: .LBB13_2:
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: je .LBB13_3
+; X86-NEXT: # %bb.4: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB13_3:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
%z = call i32 @llvm.umin.i32(i32 %x, i32 54)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
@@ -260,6 +451,21 @@ define i32 @smin_known_nonzero(i32 %xx, i32 %yy) {
; CHECK-NEXT: cmovll %eax, %esi
; CHECK-NEXT: rep bsfl %esi, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: smin_known_nonzero:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $4, %edx
+; X86-NEXT: shll %cl, %edx
+; X86-NEXT: addl $4, %eax
+; X86-NEXT: cmpl %eax, %edx
+; X86-NEXT: jl .LBB14_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: .LBB14_2:
+; X86-NEXT: rep bsfl %edx, %eax
+; X86-NEXT: retl
%x = shl nuw i32 4, %xx
%y = add nuw nsw i32 %yy, 4
%z = call i32 @llvm.smin.i32(i32 %x, i32 %y)
@@ -267,6 +473,134 @@ define i32 @smin_known_nonzero(i32 %xx, i32 %yy) {
ret i32 %r
}
+define i32 @smin_known_zero(i32 %x, i32 %y) {
+; CHECK-LABEL: smin_known_zero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cmpl $-54, %edi
+; CHECK-NEXT: movl $-54, %eax
+; CHECK-NEXT: cmovll %edi, %eax
+; CHECK-NEXT: bsfl %eax, %ecx
+; CHECK-NEXT: movl $32, %eax
+; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: retq
+;
+; X86-LABEL: smin_known_zero:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl $-54, %eax
+; X86-NEXT: jl .LBB15_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl $-54, %eax
+; X86-NEXT: .LBB15_2:
+; X86-NEXT: bsfl %eax, %ecx
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: je .LBB15_4
+; X86-NEXT: # %bb.3:
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: .LBB15_4:
+; X86-NEXT: retl
+ %z = call i32 @llvm.smin.i32(i32 %x, i32 -54)
+ %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
+ ret i32 %r
+}
+
+define <4 x i32> @smin_known_zero_vec(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: smin_known_zero_vec:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [4294967242,4294967273,4294967284,4294967295]
+; CHECK-NEXT: movdqa %xmm1, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm2
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: pandn %xmm1, %xmm2
+; CHECK-NEXT: por %xmm2, %xmm0
+; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
+; CHECK-NEXT: paddd %xmm0, %xmm1
+; CHECK-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NEXT: pxor %xmm1, %xmm0
+; CHECK-NEXT: pcmpgtd %xmm1, %xmm0
+; CHECK-NEXT: psrld $31, %xmm0
+; CHECK-NEXT: retq
+;
+; X86-LABEL: smin_known_zero_vec:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: pushl %ebx
+; X86-NEXT: .cfi_def_cfa_offset 12
+; X86-NEXT: pushl %edi
+; X86-NEXT: .cfi_def_cfa_offset 16
+; X86-NEXT: pushl %esi
+; X86-NEXT: .cfi_def_cfa_offset 20
+; X86-NEXT: .cfi_offset %esi, -20
+; X86-NEXT: .cfi_offset %edi, -16
+; X86-NEXT: .cfi_offset %ebx, -12
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: cmpl $-1, %edi
+; X86-NEXT: jl .LBB16_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl $-1, %edi
+; X86-NEXT: .LBB16_2:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: cmpl $-12, %ebp
+; X86-NEXT: jl .LBB16_4
+; X86-NEXT: # %bb.3:
+; X86-NEXT: movl $-12, %ebp
+; X86-NEXT: .LBB16_4:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl $-23, %esi
+; X86-NEXT: jl .LBB16_6
+; X86-NEXT: # %bb.5:
+; X86-NEXT: movl $-23, %esi
+; X86-NEXT: .LBB16_6:
+; X86-NEXT: cmpl $-54, %ecx
+; X86-NEXT: jl .LBB16_8
+; X86-NEXT: # %bb.7:
+; X86-NEXT: movl $-54, %ecx
+; X86-NEXT: .LBB16_8:
+; X86-NEXT: leal -1(%ecx), %edx
+; X86-NEXT: xorl %edx, %ecx
+; X86-NEXT: xorl %ebx, %ebx
+; X86-NEXT: cmpl %edx, %ecx
+; X86-NEXT: seta %bl
+; X86-NEXT: leal -1(%esi), %edx
+; X86-NEXT: xorl %edx, %esi
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: cmpl %edx, %esi
+; X86-NEXT: seta %cl
+; X86-NEXT: leal -1(%ebp), %esi
+; X86-NEXT: xorl %esi, %ebp
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: cmpl %esi, %ebp
+; X86-NEXT: seta %dl
+; X86-NEXT: leal -1(%edi), %esi
+; X86-NEXT: xorl %esi, %edi
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: cmpl %esi, %edi
+; X86-NEXT: seta %al
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %eax, 12(%esi)
+; X86-NEXT: movl %edx, 8(%esi)
+; X86-NEXT: movl %ecx, 4(%esi)
+; X86-NEXT: movl %ebx, (%esi)
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: .cfi_def_cfa_offset 16
+; X86-NEXT: popl %edi
+; X86-NEXT: .cfi_def_cfa_offset 12
+; X86-NEXT: popl %ebx
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: popl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl $4
+ %z = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %x, <4 x i32> <i32 -54, i32 -23, i32 -12, i32 -1>)
+ %r = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %z)
+ %3 = icmp eq <4 x i32> %r, <i32 1, i32 1, i32 1, i32 1>
+ %ret = zext <4 x i1> %3 to <4 x i32>
+ ret <4 x i32> %ret
+}
+
define i32 @smin_maybe_zero(i32 %x, i32 %y) {
; CHECK-LABEL: smin_maybe_zero:
; CHECK: # %bb.0:
@@ -274,13 +608,30 @@ define i32 @smin_maybe_zero(i32 %x, i32 %y) {
; CHECK-NEXT: movl $54, %eax
; CHECK-NEXT: cmovll %edi, %eax
; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: je .LBB15_1
+; CHECK-NEXT: je .LBB17_1
; CHECK-NEXT: # %bb.2: # %cond.false
; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB15_1:
+; CHECK-NEXT: .LBB17_1:
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: smin_maybe_zero:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl $54, %eax
+; X86-NEXT: jl .LBB17_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl $54, %eax
+; X86-NEXT: .LBB17_2:
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: je .LBB17_3
+; X86-NEXT: # %bb.4: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB17_3:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
%z = call i32 @llvm.smin.i32(i32 %x, i32 54)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
@@ -298,6 +649,21 @@ define i32 @smax_known_nonzero(i32 %xx, i32 %yy) {
; CHECK-NEXT: cmovgl %eax, %esi
; CHECK-NEXT: rep bsfl %esi, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: smax_known_nonzero:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $4, %edx
+; X86-NEXT: shll %cl, %edx
+; X86-NEXT: addl $4, %eax
+; X86-NEXT: cmpl %eax, %edx
+; X86-NEXT: jg .LBB18_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: .LBB18_2:
+; X86-NEXT: rep bsfl %edx, %eax
+; X86-NEXT: retl
%x = shl nuw i32 4, %xx
%y = add nuw nsw i32 %yy, 4
%z = call i32 @llvm.smax.i32(i32 %x, i32 %y)
@@ -305,8 +671,8 @@ define i32 @smax_known_nonzero(i32 %xx, i32 %yy) {
ret i32 %r
}
-define i32 @smax_maybe_zero(i32 %x, i32 %y) {
-; CHECK-LABEL: smax_maybe_zero:
+define i32 @smax_known_nonzero_2(i32 %x, i32 %y) {
+; CHECK-LABEL: smax_known_nonzero_2:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpl $55, %edi
; CHECK-NEXT: movl $54, %eax
@@ -315,11 +681,181 @@ define i32 @smax_maybe_zero(i32 %x, i32 %y) {
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: cmovnel %ecx, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: smax_known_nonzero_2:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl $55, %eax
+; X86-NEXT: jge .LBB19_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl $54, %eax
+; X86-NEXT: .LBB19_2:
+; X86-NEXT: bsfl %eax, %ecx
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: je .LBB19_4
+; X86-NEXT: # %bb.3:
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: .LBB19_4:
+; X86-NEXT: retl
%z = call i32 @llvm.smax.i32(i32 %x, i32 54)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}
+define <4 x i32> @smax_known_zero_vec(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: smax_known_zero_vec:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [54,23,12,1]
+; CHECK-NEXT: movdqa %xmm0, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm1, %xmm2
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: pandn %xmm1, %xmm2
+; CHECK-NEXT: por %xmm2, %xmm0
+; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
+; CHECK-NEXT: paddd %xmm0, %xmm1
+; CHECK-NEXT: pandn %xmm1, %xmm0
+; CHECK-NEXT: movdqa %xmm0, %xmm1
+; CHECK-NEXT: psrlw $1, %xmm1
+; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NEXT: psubb %xmm1, %xmm0
+; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; CHECK-NEXT: movdqa %xmm0, %xmm2
+; CHECK-NEXT: pand %xmm1, %xmm2
+; CHECK-NEXT: psrlw $2, %xmm0
+; CHECK-NEXT: pand %xmm1, %xmm0
+; CHECK-NEXT: paddb %xmm2, %xmm0
+; CHECK-NEXT: movdqa %xmm0, %xmm1
+; CHECK-NEXT: psrlw $4, %xmm1
+; CHECK-NEXT: paddb %xmm1, %xmm0
+; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: movdqa %xmm0, %xmm2
+; CHECK-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; CHECK-NEXT: psadbw %xmm1, %xmm2
+; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; CHECK-NEXT: psadbw %xmm1, %xmm0
+; CHECK-NEXT: packuswb %xmm2, %xmm0
+; CHECK-NEXT: retq
+;
+; X86-LABEL: smax_known_zero_vec:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: pushl %ebx
+; X86-NEXT: .cfi_def_cfa_offset 12
+; X86-NEXT: pushl %edi
+; X86-NEXT: .cfi_def_cfa_offset 16
+; X86-NEXT: pushl %esi
+; X86-NEXT: .cfi_def_cfa_offset 20
+; X86-NEXT: .cfi_offset %esi, -20
+; X86-NEXT: .cfi_offset %edi, -16
+; X86-NEXT: .cfi_offset %ebx, -12
+; X86-NEXT: .cfi_offset %ebp, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl $2, %ecx
+; X86-NEXT: jge .LBB20_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl $1, %ecx
+; X86-NEXT: .LBB20_2:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl $13, %edx
+; X86-NEXT: jge .LBB20_4
+; X86-NEXT: # %bb.3:
+; X86-NEXT: movl $12, %edx
+; X86-NEXT: .LBB20_4:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: cmpl $24, %eax
+; X86-NEXT: jge .LBB20_6
+; X86-NEXT: # %bb.5:
+; X86-NEXT: movl $23, %eax
+; X86-NEXT: .LBB20_6:
+; X86-NEXT: cmpl $55, %esi
+; X86-NEXT: jge .LBB20_8
+; X86-NEXT: # %bb.7:
+; X86-NEXT: movl $54, %esi
+; X86-NEXT: .LBB20_8:
+; X86-NEXT: bsfl %esi, %ebx
+; X86-NEXT: movl $32, %edi
+; X86-NEXT: movl $32, %esi
+; X86-NEXT: je .LBB20_10
+; X86-NEXT: # %bb.9:
+; X86-NEXT: movl %ebx, %esi
+; X86-NEXT: .LBB20_10:
+; X86-NEXT: bsfl %eax, %eax
+; X86-NEXT: movl $32, %ebx
+; X86-NEXT: je .LBB20_12
+; X86-NEXT: # %bb.11:
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: .LBB20_12:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: bsfl %edx, %ebp
+; X86-NEXT: movl $32, %edx
+; X86-NEXT: je .LBB20_14
+; X86-NEXT: # %bb.13:
+; X86-NEXT: movl %ebp, %edx
+; X86-NEXT: .LBB20_14:
+; X86-NEXT: bsfl %ecx, %ecx
+; X86-NEXT: je .LBB20_16
+; X86-NEXT: # %bb.15:
+; X86-NEXT: movl %ecx, %edi
+; X86-NEXT: .LBB20_16:
+; X86-NEXT: movl %edi, 12(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: movl %ebx, 4(%eax)
+; X86-NEXT: movl %esi, (%eax)
+; X86-NEXT: popl %esi
+; X86-NEXT: .cfi_def_cfa_offset 16
+; X86-NEXT: popl %edi
+; X86-NEXT: .cfi_def_cfa_offset 12
+; X86-NEXT: popl %ebx
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: popl %ebp
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl $4
+ %z = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %x, <4 x i32> <i32 54, i32 23, i32 12, i32 1>)
+ %r = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %z)
+ %3 = icmp eq <4 x i32> %r, <i32 1, i32 1, i32 1, i32 1>
+ %ret = zext <4 x i1> %3 to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+define i32 @smax_maybe_zero(i32 %x, i32 %y) {
+; CHECK-LABEL: smax_maybe_zero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: movl $-1, %eax
+; CHECK-NEXT: cmovnsl %edi, %eax
+; CHECK-NEXT: testl %eax, %eax
+; CHECK-NEXT: je .LBB21_1
+; CHECK-NEXT: # %bb.2: # %cond.false
+; CHECK-NEXT: rep bsfl %eax, %eax
+; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB21_1:
+; CHECK-NEXT: movl $32, %eax
+; CHECK-NEXT: retq
+;
+; X86-LABEL: smax_maybe_zero:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: jns .LBB21_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl $-1, %eax
+; X86-NEXT: .LBB21_2:
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: je .LBB21_3
+; X86-NEXT: # %bb.4: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB21_3:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
+ %z = call i32 @llvm.smax.i32(i32 %x, i32 -1)
+ %r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
+ ret i32 %r
+}
+
define i32 @rotr_known_nonzero(i32 %xx, i32 %y) {
; CHECK-LABEL: rotr_known_nonzero:
; CHECK: # %bb.0:
@@ -328,13 +864,28 @@ define i32 @rotr_known_nonzero(i32 %xx, i32 %y) {
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: rorl %cl, %edi
; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: je .LBB18_1
+; CHECK-NEXT: je .LBB22_1
; CHECK-NEXT: # %bb.2: # %cond.false
; CHECK-NEXT: rep bsfl %edi, %eax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB18_1:
+; CHECK-NEXT: .LBB22_1:
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: rotr_known_nonzero:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $256, %eax # imm = 0x100
+; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: rorl %cl, %eax
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: je .LBB22_1
+; X86-NEXT: # %bb.2: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB22_1:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
%x = or i32 %xx, 256
%shr = lshr i32 %x, %y
%sub = sub i32 32, %y
@@ -351,13 +902,27 @@ define i32 @rotr_maybe_zero(i32 %x, i32 %y) {
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: rorl %cl, %edi
; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: je .LBB19_1
+; CHECK-NEXT: je .LBB23_1
; CHECK-NEXT: # %bb.2: # %cond.false
; CHECK-NEXT: rep bsfl %edi, %eax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB19_1:
+; CHECK-NEXT: .LBB23_1:
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: rotr_maybe_zero:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: rorl %cl, %eax
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: je .LBB23_1
+; X86-NEXT: # %bb.2: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB23_1:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
%shr = lshr i32 %x, %y
%sub = sub i32 32, %y
%shl = shl i32 %x, %sub
@@ -375,6 +940,15 @@ define i32 @rotr_with_fshr_known_nonzero(i32 %xx, i32 %y) {
; CHECK-NEXT: rorl %cl, %edi
; CHECK-NEXT: rep bsfl %edi, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: rotr_with_fshr_known_nonzero:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $256, %eax # imm = 0x100
+; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: rorl %cl, %eax
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
%x = or i32 %xx, 256
%z = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %y)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -388,13 +962,27 @@ define i32 @rotr_with_fshr_maybe_zero(i32 %x, i32 %y) {
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: rorl %cl, %edi
; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: je .LBB21_1
+; CHECK-NEXT: je .LBB25_1
; CHECK-NEXT: # %bb.2: # %cond.false
; CHECK-NEXT: rep bsfl %edi, %eax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB21_1:
+; CHECK-NEXT: .LBB25_1:
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: rotr_with_fshr_maybe_zero:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: rorl %cl, %eax
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: je .LBB25_1
+; X86-NEXT: # %bb.2: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB25_1:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
%z = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %y)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
@@ -408,13 +996,28 @@ define i32 @rotl_known_nonzero(i32 %xx, i32 %y) {
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: roll %cl, %edi
; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: je .LBB22_1
+; CHECK-NEXT: je .LBB26_1
; CHECK-NEXT: # %bb.2: # %cond.false
; CHECK-NEXT: rep bsfl %edi, %eax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB22_1:
+; CHECK-NEXT: .LBB26_1:
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: rotl_known_nonzero:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $256, %eax # imm = 0x100
+; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: roll %cl, %eax
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: je .LBB26_1
+; X86-NEXT: # %bb.2: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB26_1:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
%x = or i32 %xx, 256
%shl = shl i32 %x, %y
%sub = sub i32 32, %y
@@ -431,13 +1034,27 @@ define i32 @rotl_maybe_zero(i32 %x, i32 %y) {
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: roll %cl, %edi
; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: je .LBB23_1
+; CHECK-NEXT: je .LBB27_1
; CHECK-NEXT: # %bb.2: # %cond.false
; CHECK-NEXT: rep bsfl %edi, %eax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB23_1:
+; CHECK-NEXT: .LBB27_1:
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: rotl_maybe_zero:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: roll %cl, %eax
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: je .LBB27_1
+; X86-NEXT: # %bb.2: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB27_1:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
%shl = shl i32 %x, %y
%sub = sub i32 32, %y
%shr = lshr i32 %x, %sub
@@ -455,6 +1072,15 @@ define i32 @rotl_with_fshl_known_nonzero(i32 %xx, i32 %y) {
; CHECK-NEXT: roll %cl, %edi
; CHECK-NEXT: rep bsfl %edi, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: rotl_with_fshl_known_nonzero:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $256, %eax # imm = 0x100
+; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: roll %cl, %eax
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
%x = or i32 %xx, 256
%z = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %y)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -468,13 +1094,27 @@ define i32 @rotl_with_fshl_maybe_zero(i32 %x, i32 %y) {
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: roll %cl, %edi
; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: je .LBB25_1
+; CHECK-NEXT: je .LBB29_1
; CHECK-NEXT: # %bb.2: # %cond.false
; CHECK-NEXT: rep bsfl %edi, %eax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB25_1:
+; CHECK-NEXT: .LBB29_1:
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: rotl_with_fshl_maybe_zero:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: roll %cl, %eax
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: je .LBB29_1
+; X86-NEXT: # %bb.2: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB29_1:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
%z = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %y)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
@@ -489,6 +1129,14 @@ define i32 @sra_known_nonzero_sign_bit_set(i32 %x) {
; CHECK-NEXT: sarl %cl, %eax
; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: sra_known_nonzero_sign_bit_set:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $-2147360405, %eax # imm = 0x8001E16B
+; X86-NEXT: sarl %cl, %eax
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
%z = ashr i32 2147606891, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
@@ -503,6 +1151,15 @@ define i32 @sra_known_nonzero_exact(i32 %x, i32 %yy) {
; CHECK-NEXT: sarl %cl, %esi
; CHECK-NEXT: rep bsfl %esi, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: sra_known_nonzero_exact:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $256, %eax # imm = 0x100
+; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: sarl %cl, %eax
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
%y = or i32 %yy, 256
%z = ashr exact i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -516,13 +1173,27 @@ define i32 @sra_maybe_zero(i32 %x, i32 %y) {
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: sarl %cl, %esi
; CHECK-NEXT: testl %esi, %esi
-; CHECK-NEXT: je .LBB28_1
+; CHECK-NEXT: je .LBB32_1
; CHECK-NEXT: # %bb.2: # %cond.false
; CHECK-NEXT: rep bsfl %esi, %eax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB28_1:
+; CHECK-NEXT: .LBB32_1:
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: sra_maybe_zero:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: sarl %cl, %eax
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: je .LBB32_1
+; X86-NEXT: # %bb.2: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB32_1:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
%z = ashr exact i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
@@ -537,6 +1208,14 @@ define i32 @srl_known_nonzero_sign_bit_set(i32 %x) {
; CHECK-NEXT: shrl %cl, %eax
; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: srl_known_nonzero_sign_bit_set:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $-2147360405, %eax # imm = 0x8001E16B
+; X86-NEXT: shrl %cl, %eax
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
%z = lshr i32 2147606891, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
@@ -551,6 +1230,15 @@ define i32 @srl_known_nonzero_exact(i32 %x, i32 %yy) {
; CHECK-NEXT: shrl %cl, %esi
; CHECK-NEXT: rep bsfl %esi, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: srl_known_nonzero_exact:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $256, %eax # imm = 0x100
+; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: shrl %cl, %eax
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
%y = or i32 %yy, 256
%z = lshr exact i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -564,13 +1252,27 @@ define i32 @srl_maybe_zero(i32 %x, i32 %y) {
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NEXT: shrl %cl, %esi
; CHECK-NEXT: testl %esi, %esi
-; CHECK-NEXT: je .LBB31_1
+; CHECK-NEXT: je .LBB35_1
; CHECK-NEXT: # %bb.2: # %cond.false
; CHECK-NEXT: rep bsfl %esi, %eax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB31_1:
+; CHECK-NEXT: .LBB35_1:
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: srl_maybe_zero:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: shrl %cl, %eax
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: je .LBB35_1
+; X86-NEXT: # %bb.2: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB35_1:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
%z = lshr exact i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
@@ -585,6 +1287,15 @@ define i32 @udiv_known_nonzero(i32 %xx, i32 %y) {
; CHECK-NEXT: divl %esi
; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: udiv_known_nonzero:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl $64, %eax
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: divl {{[0-9]+}}(%esp)
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
%x = or i32 %xx, 64
%z = udiv exact i32 %x, %y
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -598,13 +1309,27 @@ define i32 @udiv_maybe_zero(i32 %x, i32 %y) {
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: divl %esi
; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: je .LBB33_1
+; CHECK-NEXT: je .LBB37_1
; CHECK-NEXT: # %bb.2: # %cond.false
; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB33_1:
+; CHECK-NEXT: .LBB37_1:
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: udiv_maybe_zero:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: divl {{[0-9]+}}(%esp)
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: je .LBB37_1
+; X86-NEXT: # %bb.2: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB37_1:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
%z = udiv exact i32 %x, %y
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
@@ -619,6 +1344,15 @@ define i32 @sdiv_known_nonzero(i32 %xx, i32 %y) {
; CHECK-NEXT: idivl %esi
; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: sdiv_known_nonzero:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl $64, %eax
+; X86-NEXT: cltd
+; X86-NEXT: idivl {{[0-9]+}}(%esp)
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
%x = or i32 %xx, 64
%z = sdiv exact i32 %x, %y
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -632,13 +1366,27 @@ define i32 @sdiv_maybe_zero(i32 %x, i32 %y) {
; CHECK-NEXT: cltd
; CHECK-NEXT: idivl %esi
; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: je .LBB35_1
+; CHECK-NEXT: je .LBB39_1
; CHECK-NEXT: # %bb.2: # %cond.false
; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB35_1:
+; CHECK-NEXT: .LBB39_1:
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: sdiv_maybe_zero:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cltd
+; X86-NEXT: idivl {{[0-9]+}}(%esp)
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: je .LBB39_1
+; X86-NEXT: # %bb.2: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB39_1:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
%z = sdiv exact i32 %x, %y
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
@@ -651,6 +1399,14 @@ define i32 @add_known_nonzero(i32 %xx, i32 %y) {
; CHECK-NEXT: addl %esi, %edi
; CHECK-NEXT: rep bsfl %edi, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: add_known_nonzero:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl $1, %eax
+; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
%x = or i32 %xx, 1
%z = add nuw i32 %x, %y
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -662,13 +1418,26 @@ define i32 @add_maybe_zero(i32 %xx, i32 %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: orl $1, %edi
; CHECK-NEXT: addl %esi, %edi
-; CHECK-NEXT: je .LBB37_1
+; CHECK-NEXT: je .LBB41_1
; CHECK-NEXT: # %bb.2: # %cond.false
; CHECK-NEXT: rep bsfl %edi, %eax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB37_1:
+; CHECK-NEXT: .LBB41_1:
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: add_maybe_zero:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl $1, %eax
+; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: je .LBB41_1
+; X86-NEXT: # %bb.2: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB41_1:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
%x = or i32 %xx, 1
%z = add nsw i32 %x, %y
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -685,6 +1454,15 @@ define i32 @sub_known_nonzero_neg_case(i32 %xx) {
; CHECK-NEXT: negl %eax
; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: sub_known_nonzero_neg_case:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $256, %eax # imm = 0x100
+; X86-NEXT: shll %cl, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
%x = shl nuw nsw i32 256, %xx
%z = sub i32 0, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -700,6 +1478,16 @@ define i32 @sub_known_nonzero_ne_case(i32 %xx, i32 %yy) {
; CHECK-NEXT: subl %eax, %edi
; CHECK-NEXT: rep bsfl %edi, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: sub_known_nonzero_ne_case:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: orl $64, %ecx
+; X86-NEXT: andl $-65, %eax
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
%x = or i32 %xx, 64
%y = and i32 %xx, -65
%z = sub i32 %y, %x
@@ -713,13 +1501,27 @@ define i32 @sub_maybe_zero(i32 %x) {
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: orl $64, %eax
; CHECK-NEXT: subl %edi, %eax
-; CHECK-NEXT: je .LBB40_1
+; CHECK-NEXT: je .LBB44_1
; CHECK-NEXT: # %bb.2: # %cond.false
; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB40_1:
+; CHECK-NEXT: .LBB44_1:
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: sub_maybe_zero:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: orl $64, %eax
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: je .LBB44_1
+; X86-NEXT: # %bb.2: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB44_1:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
%y = or i32 %x, 64
%z = sub i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -730,13 +1532,25 @@ define i32 @sub_maybe_zero2(i32 %x) {
; CHECK-LABEL: sub_maybe_zero2:
; CHECK: # %bb.0:
; CHECK-NEXT: negl %edi
-; CHECK-NEXT: je .LBB41_1
+; CHECK-NEXT: je .LBB45_1
; CHECK-NEXT: # %bb.2: # %cond.false
; CHECK-NEXT: rep bsfl %edi, %eax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB41_1:
+; CHECK-NEXT: .LBB45_1:
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: sub_maybe_zero2:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: je .LBB45_1
+; X86-NEXT: # %bb.2: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB45_1:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
%z = sub i32 0, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
@@ -748,13 +1562,27 @@ define i32 @mul_known_nonzero_nsw(i32 %x, i32 %yy) {
; CHECK-NEXT: orl $256, %esi # imm = 0x100
; CHECK-NEXT: imull %edi, %esi
; CHECK-NEXT: testl %esi, %esi
-; CHECK-NEXT: je .LBB42_1
+; CHECK-NEXT: je .LBB46_1
; CHECK-NEXT: # %bb.2: # %cond.false
; CHECK-NEXT: rep bsfl %esi, %eax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB42_1:
+; CHECK-NEXT: .LBB46_1:
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: mul_known_nonzero_nsw:
+; X86: # %bb.0:
+; X86-NEXT: movl $256, %eax # imm = 0x100
+; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: imull {{[0-9]+}}(%esp), %eax
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: je .LBB46_1
+; X86-NEXT: # %bb.2: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB46_1:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
%y = or i32 %yy, 256
%z = mul nsw i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -767,13 +1595,27 @@ define i32 @mul_known_nonzero_nuw(i32 %x, i32 %yy) {
; CHECK-NEXT: orl $256, %esi # imm = 0x100
; CHECK-NEXT: imull %edi, %esi
; CHECK-NEXT: testl %esi, %esi
-; CHECK-NEXT: je .LBB43_1
+; CHECK-NEXT: je .LBB47_1
; CHECK-NEXT: # %bb.2: # %cond.false
; CHECK-NEXT: rep bsfl %esi, %eax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB43_1:
+; CHECK-NEXT: .LBB47_1:
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: mul_known_nonzero_nuw:
+; X86: # %bb.0:
+; X86-NEXT: movl $256, %eax # imm = 0x100
+; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: imull {{[0-9]+}}(%esp), %eax
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: je .LBB47_1
+; X86-NEXT: # %bb.2: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB47_1:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
%y = or i32 %yy, 256
%z = mul nuw i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -785,13 +1627,26 @@ define i32 @mul_maybe_zero(i32 %x, i32 %y) {
; CHECK: # %bb.0:
; CHECK-NEXT: imull %esi, %edi
; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: je .LBB44_1
+; CHECK-NEXT: je .LBB48_1
; CHECK-NEXT: # %bb.2: # %cond.false
; CHECK-NEXT: rep bsfl %edi, %eax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB44_1:
+; CHECK-NEXT: .LBB48_1:
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: mul_maybe_zero:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: imull {{[0-9]+}}(%esp), %eax
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: je .LBB48_1
+; X86-NEXT: # %bb.2: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB48_1:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
%z = mul nuw nsw i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
@@ -811,6 +1666,31 @@ define i32 @bitcast_known_nonzero(<2 x i16> %xx) {
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: cmovnel %ecx, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: bitcast_known_nonzero:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %esi, -8
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $256, %edx # imm = 0x100
+; X86-NEXT: movl $256, %esi # imm = 0x100
+; X86-NEXT: shll %cl, %esi
+; X86-NEXT: shll $16, %esi
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shll %cl, %edx
+; X86-NEXT: movzwl %dx, %eax
+; X86-NEXT: orl %esi, %eax
+; X86-NEXT: bsfl %eax, %ecx
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: je .LBB49_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: .LBB49_2:
+; X86-NEXT: popl %esi
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
%x = shl nuw nsw <2 x i16> <i16 256, i16 256>, %xx
%z = bitcast <2 x i16> %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -822,13 +1702,27 @@ define i32 @bitcast_maybe_zero(<2 x i16> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: je .LBB46_1
+; CHECK-NEXT: je .LBB50_1
; CHECK-NEXT: # %bb.2: # %cond.false
; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB46_1:
+; CHECK-NEXT: .LBB50_1:
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: bitcast_maybe_zero:
+; X86: # %bb.0:
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: shll $16, %ecx
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: je .LBB50_1
+; X86-NEXT: # %bb.2: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB50_1:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
%z = bitcast <2 x i16> %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
@@ -839,13 +1733,34 @@ define i32 @bitcast_from_float(float %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: je .LBB47_1
+; CHECK-NEXT: je .LBB51_1
; CHECK-NEXT: # %bb.2: # %cond.false
; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB47_1:
+; CHECK-NEXT: .LBB51_1:
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: bitcast_from_float:
+; X86: # %bb.0:
+; X86-NEXT: pushl %eax
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fstps (%esp)
+; X86-NEXT: movl (%esp), %eax
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: je .LBB51_1
+; X86-NEXT: # %bb.2: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: popl %ecx
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+; X86-NEXT: .LBB51_1:
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: popl %ecx
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
%z = bitcast float %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
@@ -861,6 +1776,15 @@ define i32 @zext_known_nonzero(i16 %xx) {
; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: zext_known_nonzero:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $256, %eax # imm = 0x100
+; X86-NEXT: shll %cl, %eax
+; X86-NEXT: movzwl %ax, %eax
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
%x = shl nuw nsw i16 256, %xx
%z = zext i16 %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -871,14 +1795,27 @@ define i32 @zext_maybe_zero(i16 %x) {
; CHECK-LABEL: zext_maybe_zero:
; CHECK: # %bb.0:
; CHECK-NEXT: testw %di, %di
-; CHECK-NEXT: je .LBB49_1
+; CHECK-NEXT: je .LBB53_1
; CHECK-NEXT: # %bb.2: # %cond.false
; CHECK-NEXT: movzwl %di, %eax
; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB49_1:
+; CHECK-NEXT: .LBB53_1:
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: zext_maybe_zero:
+; X86: # %bb.0:
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: testw %ax, %ax
+; X86-NEXT: je .LBB53_1
+; X86-NEXT: # %bb.2: # %cond.false
+; X86-NEXT: movzwl %ax, %eax
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB53_1:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
%z = zext i16 %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
@@ -894,6 +1831,15 @@ define i32 @sext_known_nonzero(i16 %xx) {
; CHECK-NEXT: cwtl
; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: sext_known_nonzero:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $256, %eax # imm = 0x100
+; X86-NEXT: shll %cl, %eax
+; X86-NEXT: cwtl
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
%x = shl nuw nsw i16 256, %xx
%z = sext i16 %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -904,14 +1850,26 @@ define i32 @sext_maybe_zero(i16 %x) {
; CHECK-LABEL: sext_maybe_zero:
; CHECK: # %bb.0:
; CHECK-NEXT: testw %di, %di
-; CHECK-NEXT: je .LBB51_1
+; CHECK-NEXT: je .LBB55_1
; CHECK-NEXT: # %bb.2: # %cond.false
; CHECK-NEXT: movswl %di, %eax
; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB51_1:
+; CHECK-NEXT: .LBB55_1:
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: retq
+;
+; X86-LABEL: sext_maybe_zero:
+; X86: # %bb.0:
+; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: je .LBB55_1
+; X86-NEXT: # %bb.2: # %cond.false
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: retl
+; X86-NEXT: .LBB55_1:
+; X86-NEXT: movl $32, %eax
+; X86-NEXT: retl
%z = sext i16 %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
>From 48978cea1b31d407b2a6c08ad1bae107d47b4317 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Thu, 21 Mar 2024 09:59:38 -0400
Subject: [PATCH 2/2] [SelectionDAG]: Deduce known bits from SMIN and SMAX
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 34 +++-
llvm/test/CodeGen/X86/known-never-zero.ll | 153 +++++++-----------
2 files changed, 88 insertions(+), 99 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index cd6f083243d09c..61816c8491bbf1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5361,10 +5361,38 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
return isKnownNeverZero(Op.getOperand(1), Depth + 1) ||
isKnownNeverZero(Op.getOperand(0), Depth + 1);
- // TODO for smin/smax: If either operand is known negative/positive
+ // For smin/smax: If either operand is known negative/positive
// respectively we don't need the other to be known at all.
- case ISD::SMAX:
- case ISD::SMIN:
+ case ISD::SMAX: {
+ KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1);
+ if (Op1.isStrictlyPositive())
+ return true;
+
+ KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1);
+ if (Op0.isStrictlyPositive())
+ return true;
+
+ if (Op1.isNonZero() && Op0.isNonZero())
+ return true;
+
+ return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
+ isKnownNeverZero(Op.getOperand(0), Depth + 1);
+ }
+ case ISD::SMIN: {
+ KnownBits Op1 = computeKnownBits(Op.getOperand(1), Depth + 1);
+ if (Op1.isNegative())
+ return true;
+
+ KnownBits Op0 = computeKnownBits(Op.getOperand(0), Depth + 1);
+ if (Op0.isNegative())
+ return true;
+
+ if (Op1.isNonZero() && Op0.isNonZero())
+ return true;
+
+ return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
+ isKnownNeverZero(Op.getOperand(0), Depth + 1);
+ }
case ISD::UMIN:
return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
isKnownNeverZero(Op.getOperand(0), Depth + 1);
diff --git a/llvm/test/CodeGen/X86/known-never-zero.ll b/llvm/test/CodeGen/X86/known-never-zero.ll
index 826ce4619ceb30..7143fd17bb4db5 100644
--- a/llvm/test/CodeGen/X86/known-never-zero.ll
+++ b/llvm/test/CodeGen/X86/known-never-zero.ll
@@ -479,9 +479,7 @@ define i32 @smin_known_zero(i32 %x, i32 %y) {
; CHECK-NEXT: cmpl $-54, %edi
; CHECK-NEXT: movl $-54, %eax
; CHECK-NEXT: cmovll %edi, %eax
-; CHECK-NEXT: bsfl %eax, %ecx
-; CHECK-NEXT: movl $32, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
;
; X86-LABEL: smin_known_zero:
@@ -492,12 +490,7 @@ define i32 @smin_known_zero(i32 %x, i32 %y) {
; X86-NEXT: # %bb.1:
; X86-NEXT: movl $-54, %eax
; X86-NEXT: .LBB15_2:
-; X86-NEXT: bsfl %eax, %ecx
-; X86-NEXT: movl $32, %eax
-; X86-NEXT: je .LBB15_4
-; X86-NEXT: # %bb.3:
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB15_4:
+; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
%z = call i32 @llvm.smin.i32(i32 %x, i32 -54)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -515,9 +508,9 @@ define <4 x i32> @smin_known_zero_vec(<4 x i32> %x, <4 x i32> %y) {
; CHECK-NEXT: por %xmm2, %xmm0
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
; CHECK-NEXT: paddd %xmm0, %xmm1
-; CHECK-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-NEXT: pxor %xmm1, %xmm0
-; CHECK-NEXT: pcmpgtd %xmm1, %xmm0
+; CHECK-NEXT: pand %xmm1, %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
; CHECK-NEXT: psrld $31, %xmm0
; CHECK-NEXT: retq
;
@@ -560,25 +553,21 @@ define <4 x i32> @smin_known_zero_vec(<4 x i32> %x, <4 x i32> %y) {
; X86-NEXT: movl $-54, %ecx
; X86-NEXT: .LBB16_8:
; X86-NEXT: leal -1(%ecx), %edx
-; X86-NEXT: xorl %edx, %ecx
; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: seta %bl
+; X86-NEXT: testl %edx, %ecx
+; X86-NEXT: sete %bl
; X86-NEXT: leal -1(%esi), %edx
-; X86-NEXT: xorl %edx, %esi
; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: cmpl %edx, %esi
-; X86-NEXT: seta %cl
+; X86-NEXT: testl %edx, %esi
+; X86-NEXT: sete %cl
; X86-NEXT: leal -1(%ebp), %esi
-; X86-NEXT: xorl %esi, %ebp
; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: cmpl %esi, %ebp
-; X86-NEXT: seta %dl
+; X86-NEXT: testl %esi, %ebp
+; X86-NEXT: sete %dl
; X86-NEXT: leal -1(%edi), %esi
-; X86-NEXT: xorl %esi, %edi
; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %esi, %edi
-; X86-NEXT: seta %al
+; X86-NEXT: testl %esi, %edi
+; X86-NEXT: sete %al
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %eax, 12(%esi)
; X86-NEXT: movl %edx, 8(%esi)
@@ -677,9 +666,7 @@ define i32 @smax_known_nonzero_2(i32 %x, i32 %y) {
; CHECK-NEXT: cmpl $55, %edi
; CHECK-NEXT: movl $54, %eax
; CHECK-NEXT: cmovgel %edi, %eax
-; CHECK-NEXT: bsfl %eax, %ecx
-; CHECK-NEXT: movl $32, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: rep bsfl %eax, %eax
; CHECK-NEXT: retq
;
; X86-LABEL: smax_known_nonzero_2:
@@ -690,12 +677,7 @@ define i32 @smax_known_nonzero_2(i32 %x, i32 %y) {
; X86-NEXT: # %bb.1:
; X86-NEXT: movl $54, %eax
; X86-NEXT: .LBB19_2:
-; X86-NEXT: bsfl %eax, %ecx
-; X86-NEXT: movl $32, %eax
-; X86-NEXT: je .LBB19_4
-; X86-NEXT: # %bb.3:
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB19_4:
+; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
%z = call i32 @llvm.smax.i32(i32 %x, i32 54)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -713,28 +695,10 @@ define <4 x i32> @smax_known_zero_vec(<4 x i32> %x, <4 x i32> %y) {
; CHECK-NEXT: por %xmm2, %xmm0
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
; CHECK-NEXT: paddd %xmm0, %xmm1
-; CHECK-NEXT: pandn %xmm1, %xmm0
-; CHECK-NEXT: movdqa %xmm0, %xmm1
-; CHECK-NEXT: psrlw $1, %xmm1
-; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-NEXT: psubb %xmm1, %xmm0
-; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
-; CHECK-NEXT: movdqa %xmm0, %xmm2
-; CHECK-NEXT: pand %xmm1, %xmm2
-; CHECK-NEXT: psrlw $2, %xmm0
-; CHECK-NEXT: pand %xmm1, %xmm0
-; CHECK-NEXT: paddb %xmm2, %xmm0
-; CHECK-NEXT: movdqa %xmm0, %xmm1
-; CHECK-NEXT: psrlw $4, %xmm1
-; CHECK-NEXT: paddb %xmm1, %xmm0
-; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: movdqa %xmm0, %xmm2
-; CHECK-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; CHECK-NEXT: psadbw %xmm1, %xmm2
-; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; CHECK-NEXT: psadbw %xmm1, %xmm0
-; CHECK-NEXT: packuswb %xmm2, %xmm0
+; CHECK-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NEXT: pxor %xmm1, %xmm0
+; CHECK-NEXT: pcmpgtd %xmm1, %xmm0
+; CHECK-NEXT: psrld $31, %xmm0
; CHECK-NEXT: retq
;
; X86-LABEL: smax_known_zero_vec:
@@ -751,59 +715,56 @@ define <4 x i32> @smax_known_zero_vec(<4 x i32> %x, <4 x i32> %y) {
; X86-NEXT: .cfi_offset %edi, -16
; X86-NEXT: .cfi_offset %ebx, -12
; X86-NEXT: .cfi_offset %ebp, -8
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpl $2, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: cmpl $2, %edi
; X86-NEXT: jge .LBB20_2
; X86-NEXT: # %bb.1:
-; X86-NEXT: movl $1, %ecx
+; X86-NEXT: movl $1, %edi
; X86-NEXT: .LBB20_2:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $13, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: cmpl $13, %ebp
; X86-NEXT: jge .LBB20_4
; X86-NEXT: # %bb.3:
-; X86-NEXT: movl $12, %edx
+; X86-NEXT: movl $12, %ebp
; X86-NEXT: .LBB20_4:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: cmpl $24, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl $24, %esi
; X86-NEXT: jge .LBB20_6
; X86-NEXT: # %bb.5:
-; X86-NEXT: movl $23, %eax
+; X86-NEXT: movl $23, %esi
; X86-NEXT: .LBB20_6:
-; X86-NEXT: cmpl $55, %esi
+; X86-NEXT: cmpl $55, %ecx
; X86-NEXT: jge .LBB20_8
; X86-NEXT: # %bb.7:
-; X86-NEXT: movl $54, %esi
+; X86-NEXT: movl $54, %ecx
; X86-NEXT: .LBB20_8:
-; X86-NEXT: bsfl %esi, %ebx
-; X86-NEXT: movl $32, %edi
-; X86-NEXT: movl $32, %esi
-; X86-NEXT: je .LBB20_10
-; X86-NEXT: # %bb.9:
-; X86-NEXT: movl %ebx, %esi
-; X86-NEXT: .LBB20_10:
-; X86-NEXT: bsfl %eax, %eax
-; X86-NEXT: movl $32, %ebx
-; X86-NEXT: je .LBB20_12
-; X86-NEXT: # %bb.11:
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: .LBB20_12:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: bsfl %edx, %ebp
-; X86-NEXT: movl $32, %edx
-; X86-NEXT: je .LBB20_14
-; X86-NEXT: # %bb.13:
-; X86-NEXT: movl %ebp, %edx
-; X86-NEXT: .LBB20_14:
-; X86-NEXT: bsfl %ecx, %ecx
-; X86-NEXT: je .LBB20_16
-; X86-NEXT: # %bb.15:
-; X86-NEXT: movl %ecx, %edi
-; X86-NEXT: .LBB20_16:
-; X86-NEXT: movl %edi, 12(%eax)
-; X86-NEXT: movl %edx, 8(%eax)
-; X86-NEXT: movl %ebx, 4(%eax)
-; X86-NEXT: movl %esi, (%eax)
+; X86-NEXT: leal -1(%ecx), %edx
+; X86-NEXT: xorl %edx, %ecx
+; X86-NEXT: xorl %ebx, %ebx
+; X86-NEXT: cmpl %edx, %ecx
+; X86-NEXT: seta %bl
+; X86-NEXT: leal -1(%esi), %edx
+; X86-NEXT: xorl %edx, %esi
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: cmpl %edx, %esi
+; X86-NEXT: seta %cl
+; X86-NEXT: leal -1(%ebp), %esi
+; X86-NEXT: xorl %esi, %ebp
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: cmpl %esi, %ebp
+; X86-NEXT: seta %dl
+; X86-NEXT: leal -1(%edi), %esi
+; X86-NEXT: xorl %esi, %edi
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: cmpl %esi, %edi
+; X86-NEXT: seta %al
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %eax, 12(%esi)
+; X86-NEXT: movl %edx, 8(%esi)
+; X86-NEXT: movl %ecx, 4(%esi)
+; X86-NEXT: movl %ebx, (%esi)
+; X86-NEXT: movl %esi, %eax
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 16
; X86-NEXT: popl %edi
More information about the llvm-commits
mailing list