[llvm] [X86] Promote cttz_i32(x) -> cttz_i64(i64)x | (1 << 32)) (PR #102900)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 12 06:34:50 PDT 2024
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/102900
On 64bit targets we can promote i32 CTTZ nodes to i64 by setting the 32nd bit.
#57811 also queried about whether we should use BTS instead of MOVABS+OR to avoid a i64 immediate - I'm willing to add a DAGToDAG isel peephole fix for these cases if reviewers think it worthwhile (I'm not sure if we want to introduce an entire X86ISD::BTS node type given its complexity).
Fixes #57811
>From 62b98af72e06f44e29425e92b2f4cf348f1bfafb Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Mon, 12 Aug 2024 14:32:39 +0100
Subject: [PATCH] [X86] Promote cttz_i32(x) -> cttz_i64(i64)x | (1 << 32))
On 64bit targets we can promote i32 CTTZ nodes to i64 by setting the 32nd bit.
#57811 also queried about whether we should use BTS instead of MOVABS+OR to avoid a i64 immediate - I'm willing to add a DAGToDAG isel fix for these cases if people think it worthwhile (I'm not sure if we want to introduce an entire X86ISD::BTS node type given its complexity).
Fixes #57811
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 6 +-
llvm/test/CodeGen/X86/cttz.ll | 12 +-
llvm/test/CodeGen/X86/known-never-zero.ll | 339 +++++++++-------------
llvm/test/CodeGen/X86/pr89877.ll | 13 +-
llvm/test/CodeGen/X86/pr90847.ll | 24 +-
5 files changed, 154 insertions(+), 240 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2074fac8578914..04dfd0ea0d893a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -412,6 +412,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
if (Subtarget.is64Bit()) {
+ setOperationPromotedToType(ISD::CTTZ , MVT::i32, MVT::i64);
setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
}
@@ -3237,9 +3238,10 @@ bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,
}
bool X86TargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
- // Speculate cttz only if we can directly use TZCNT or can promote to i32.
+ // Speculate cttz only if we can directly use TZCNT or can promote to i32/i64.
return Subtarget.hasBMI() ||
- (!Ty->isVectorTy() && Ty->getScalarSizeInBits() < 32);
+ (!Ty->isVectorTy() &&
+ Ty->getScalarSizeInBits() < (Subtarget.is64Bit() ? 64u : 32u));
}
bool X86TargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
diff --git a/llvm/test/CodeGen/X86/cttz.ll b/llvm/test/CodeGen/X86/cttz.ll
index 6eb748a1afbab5..b35a1b72fcb6f1 100644
--- a/llvm/test/CodeGen/X86/cttz.ll
+++ b/llvm/test/CodeGen/X86/cttz.ll
@@ -317,13 +317,11 @@ define i32 @cttz_i32_zero_test(i32 %n) {
;
; X64-LABEL: cttz_i32_zero_test:
; X64: # %bb.0:
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB6_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %edi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB6_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rdi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
;
; X86-CLZ-LABEL: cttz_i32_zero_test:
diff --git a/llvm/test/CodeGen/X86/known-never-zero.ll b/llvm/test/CodeGen/X86/known-never-zero.ll
index df11a44626e381..d5d604a138a719 100644
--- a/llvm/test/CodeGen/X86/known-never-zero.ll
+++ b/llvm/test/CodeGen/X86/known-never-zero.ll
@@ -54,13 +54,12 @@ define i32 @or_maybe_zero(i32 %x, i32 %y) {
;
; X64-LABEL: or_maybe_zero:
; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: orl %esi, %edi
-; X64-NEXT: je .LBB1_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %edi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB1_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rdi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = or i32 %x, %y
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -115,13 +114,10 @@ define i32 @select_maybe_zero(i1 %c, i32 %x) {
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: testb $1, %dil
; X64-NEXT: cmovnel %esi, %eax
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB3_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB3_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT: orq %rax, %rcx
+; X64-NEXT: rep bsfq %rcx, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%y = or i32 %x, 1
%z = select i1 %c, i32 %y, i32 0
@@ -216,16 +212,14 @@ define i32 @shl_maybe_zero(i32 %x, i32 %y) {
;
; X64-LABEL: shl_maybe_zero:
; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shll %cl, %esi
-; X64-NEXT: testl %esi, %esi
-; X64-NEXT: je .LBB7_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %esi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB7_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rsi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = shl nuw nsw i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -275,13 +269,10 @@ define i32 @uaddsat_maybe_zero(i32 %x, i32 %y) {
; X64-NEXT: addl %esi, %edi
; X64-NEXT: movl $-1, %eax
; X64-NEXT: cmovael %edi, %eax
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB9_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB9_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT: orq %rax, %rcx
+; X64-NEXT: rep bsfq %rcx, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = call i32 @llvm.uadd.sat.i32(i32 %x, i32 %y)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -334,15 +325,13 @@ define i32 @umax_maybe_zero(i32 %x, i32 %y) {
;
; X64-LABEL: umax_maybe_zero:
; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: cmpl %esi, %edi
; X64-NEXT: cmoval %edi, %esi
-; X64-NEXT: testl %esi, %esi
-; X64-NEXT: je .LBB11_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %esi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB11_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rsi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = call i32 @llvm.umax.i32(i32 %x, i32 %y)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -401,13 +390,10 @@ define i32 @umin_maybe_zero(i32 %x, i32 %y) {
; X64-NEXT: cmpl $54, %edi
; X64-NEXT: movl $54, %eax
; X64-NEXT: cmovbl %edi, %eax
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB13_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB13_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT: orq %rax, %rcx
+; X64-NEXT: rep bsfq %rcx, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = call i32 @llvm.umin.i32(i32 %x, i32 54)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -522,13 +508,10 @@ define i32 @smin_maybe_zero(i32 %x, i32 %y) {
; X64-NEXT: cmpl $54, %edi
; X64-NEXT: movl $54, %eax
; X64-NEXT: cmovll %edi, %eax
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB17_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB17_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT: orq %rax, %rcx
+; X64-NEXT: rep bsfq %rcx, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = call i32 @llvm.smin.i32(i32 %x, i32 54)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -643,13 +626,10 @@ define i32 @smax_known_zero(i32 %x, i32 %y) {
; X64-NEXT: testl %edi, %edi
; X64-NEXT: movl $-1, %eax
; X64-NEXT: cmovnsl %edi, %eax
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB21_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB21_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT: orq %rax, %rcx
+; X64-NEXT: rep bsfq %rcx, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = call i32 @llvm.smax.i32(i32 %x, i32 -1)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -676,16 +656,9 @@ define i32 @rotr_known_nonzero(i32 %xx, i32 %y) {
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: orl $256, %edi # imm = 0x100
-; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: rorl %cl, %eax
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB22_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB22_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: rorl %cl, %edi
+; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
%x = or i32 %xx, 256
%shr = lshr i32 %x, %y
@@ -714,16 +687,13 @@ define i32 @rotr_maybe_zero(i32 %x, i32 %y) {
; X64-LABEL: rotr_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movl %edi, %eax
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: rorl %cl, %eax
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB23_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB23_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: rorl %cl, %edi
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rdi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%shr = lshr i32 %x, %y
%sub = sub i32 32, %y
@@ -775,16 +745,13 @@ define i32 @rotr_with_fshr_maybe_zero(i32 %x, i32 %y) {
; X64-LABEL: rotr_with_fshr_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movl %edi, %eax
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: rorl %cl, %eax
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB25_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB25_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: rorl %cl, %edi
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rdi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %y)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -811,16 +778,9 @@ define i32 @rotl_known_nonzero(i32 %xx, i32 %y) {
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: orl $256, %edi # imm = 0x100
-; X64-NEXT: movl %edi, %eax
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: roll %cl, %eax
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB26_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB26_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: roll %cl, %edi
+; X64-NEXT: rep bsfl %edi, %eax
; X64-NEXT: retq
%x = or i32 %xx, 256
%shl = shl i32 %x, %y
@@ -849,16 +809,13 @@ define i32 @rotl_maybe_zero(i32 %x, i32 %y) {
; X64-LABEL: rotl_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movl %edi, %eax
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: roll %cl, %eax
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB27_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB27_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: roll %cl, %edi
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rdi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%shl = shl i32 %x, %y
%sub = sub i32 32, %y
@@ -910,16 +867,13 @@ define i32 @rotl_with_fshl_maybe_zero(i32 %x, i32 %y) {
; X64-LABEL: rotl_with_fshl_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: movl %edi, %eax
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: roll %cl, %eax
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB29_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB29_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: roll %cl, %edi
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rdi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %y)
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -989,16 +943,14 @@ define i32 @sra_maybe_zero(i32 %x, i32 %y) {
;
; X64-LABEL: sra_maybe_zero:
; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: sarl %cl, %esi
-; X64-NEXT: testl %esi, %esi
-; X64-NEXT: je .LBB32_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %esi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB32_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rsi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = ashr exact i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1068,16 +1020,14 @@ define i32 @srl_maybe_zero(i32 %x, i32 %y) {
;
; X64-LABEL: srl_maybe_zero:
; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shrl %cl, %esi
-; X64-NEXT: testl %esi, %esi
-; X64-NEXT: je .LBB35_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %esi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB35_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rsi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = lshr exact i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1128,13 +1078,11 @@ define i32 @udiv_maybe_zero(i32 %x, i32 %y) {
; X64-NEXT: movl %edi, %eax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divl %esi
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB37_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB37_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: # kill: def $eax killed $eax def $rax
+; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT: orq %rax, %rcx
+; X64-NEXT: rep bsfq %rcx, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = udiv exact i32 %x, %y
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1185,13 +1133,11 @@ define i32 @sdiv_maybe_zero(i32 %x, i32 %y) {
; X64-NEXT: movl %edi, %eax
; X64-NEXT: cltd
; X64-NEXT: idivl %esi
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB39_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB39_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: # kill: def $eax killed $eax def $rax
+; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT: orq %rax, %rcx
+; X64-NEXT: rep bsfq %rcx, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = sdiv exact i32 %x, %y
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1235,14 +1181,13 @@ define i32 @add_maybe_zero(i32 %xx, i32 %y) {
;
; X64-LABEL: add_maybe_zero:
; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: orl $1, %edi
; X64-NEXT: addl %esi, %edi
-; X64-NEXT: je .LBB41_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %edi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB41_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rdi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%x = or i32 %xx, 1
%z = add nsw i32 %x, %y
@@ -1321,12 +1266,10 @@ define i32 @sub_maybe_zero(i32 %x) {
; X64-NEXT: movl %edi, %eax
; X64-NEXT: orl $64, %eax
; X64-NEXT: subl %edi, %eax
-; X64-NEXT: je .LBB44_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB44_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT: orq %rax, %rcx
+; X64-NEXT: rep bsfq %rcx, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%y = or i32 %x, 64
%z = sub i32 %y, %x
@@ -1349,13 +1292,12 @@ define i32 @sub_maybe_zero2(i32 %x) {
;
; X64-LABEL: sub_maybe_zero2:
; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: negl %edi
-; X64-NEXT: je .LBB45_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %edi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB45_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rdi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = sub i32 0, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1379,15 +1321,13 @@ define i32 @mul_known_nonzero_nsw(i32 %x, i32 %yy) {
;
; X64-LABEL: mul_known_nonzero_nsw:
; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: orl $256, %esi # imm = 0x100
; X64-NEXT: imull %edi, %esi
-; X64-NEXT: testl %esi, %esi
-; X64-NEXT: je .LBB46_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %esi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB46_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rsi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%y = or i32 %yy, 256
%z = mul nsw i32 %y, %x
@@ -1412,15 +1352,13 @@ define i32 @mul_known_nonzero_nuw(i32 %x, i32 %yy) {
;
; X64-LABEL: mul_known_nonzero_nuw:
; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: orl $256, %esi # imm = 0x100
; X64-NEXT: imull %edi, %esi
-; X64-NEXT: testl %esi, %esi
-; X64-NEXT: je .LBB47_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %esi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB47_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rsi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%y = or i32 %yy, 256
%z = mul nuw i32 %y, %x
@@ -1444,14 +1382,12 @@ define i32 @mul_maybe_zero(i32 %x, i32 %y) {
;
; X64-LABEL: mul_maybe_zero:
; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: imull %esi, %edi
-; X64-NEXT: testl %edi, %edi
-; X64-NEXT: je .LBB48_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %edi, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB48_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
+; X64-NEXT: orq %rdi, %rax
+; X64-NEXT: rep bsfq %rax, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = mul nuw nsw i32 %y, %x
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1482,9 +1418,10 @@ define i32 @bitcast_known_nonzero(<2 x i16> %xx) {
; X64-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [256,256,u,u,u,u,u,u]
; X64-NEXT: vmovd %xmm0, %eax
-; X64-NEXT: bsfl %eax, %ecx
-; X64-NEXT: movl $32, %eax
-; X64-NEXT: cmovnel %ecx, %eax
+; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT: orq %rax, %rcx
+; X64-NEXT: rep bsfq %rcx, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%x = shl nuw nsw <2 x i16> <i16 256, i16 256>, %xx
%z = bitcast <2 x i16> %x to i32
@@ -1508,13 +1445,10 @@ define i32 @bitcast_maybe_zero(<2 x i16> %x) {
; X64-LABEL: bitcast_maybe_zero:
; X64: # %bb.0:
; X64-NEXT: vmovd %xmm0, %eax
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB50_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB50_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT: orq %rax, %rcx
+; X64-NEXT: rep bsfq %rcx, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = bitcast <2 x i16> %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1538,13 +1472,10 @@ define i32 @bitcast_from_float(float %x) {
; X64-LABEL: bitcast_from_float:
; X64: # %bb.0:
; X64-NEXT: vmovd %xmm0, %eax
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB51_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB51_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT: orq %rax, %rcx
+; X64-NEXT: rep bsfq %rcx, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = bitcast float %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1592,14 +1523,11 @@ define i32 @zext_maybe_zero(i16 %x) {
;
; X64-LABEL: zext_maybe_zero:
; X64: # %bb.0:
-; X64-NEXT: testw %di, %di
-; X64-NEXT: je .LBB53_1
-; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: movzwl %di, %eax
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB53_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT: orq %rax, %rcx
+; X64-NEXT: rep bsfq %rcx, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = zext i16 %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
@@ -1646,14 +1574,11 @@ define i32 @sext_maybe_zero(i16 %x) {
;
; X64-LABEL: sext_maybe_zero:
; X64: # %bb.0:
-; X64-NEXT: testw %di, %di
-; X64-NEXT: je .LBB55_1
-; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: movswl %di, %eax
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB55_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT: orq %rax, %rcx
+; X64-NEXT: rep bsfq %rcx, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%z = sext i16 %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
diff --git a/llvm/test/CodeGen/X86/pr89877.ll b/llvm/test/CodeGen/X86/pr89877.ll
index 9820ec42f5b8cc..fdbe75b467d992 100644
--- a/llvm/test/CodeGen/X86/pr89877.ll
+++ b/llvm/test/CodeGen/X86/pr89877.ll
@@ -24,14 +24,11 @@ define i32 @sext_known_nonzero(i16 %xx) {
; X64-NEXT: movl $256, %eax # imm = 0x100
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shll %cl, %eax
-; X64-NEXT: cwtl
-; X64-NEXT: testl %eax, %eax
-; X64-NEXT: je .LBB0_1
-; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfl %eax, %eax
-; X64-NEXT: retq
-; X64-NEXT: .LBB0_1:
-; X64-NEXT: movl $32, %eax
+; X64-NEXT: movswq %ax, %rax
+; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000
+; X64-NEXT: orq %rax, %rcx
+; X64-NEXT: rep bsfq %rcx, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: retq
%x = shl i16 256, %xx
%z = sext i16 %x to i32
diff --git a/llvm/test/CodeGen/X86/pr90847.ll b/llvm/test/CodeGen/X86/pr90847.ll
index 7aa0ceb26e1acb..f2d43c3ed8d5bd 100644
--- a/llvm/test/CodeGen/X86/pr90847.ll
+++ b/llvm/test/CodeGen/X86/pr90847.ll
@@ -15,14 +15,10 @@ define i32 @PR90847(<8 x float> %x) nounwind {
; AVX1-NEXT: vminps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vcmpeqps %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vmovmskps %ymm0, %eax
-; AVX1-NEXT: testl %eax, %eax
-; AVX1-NEXT: je .LBB0_1
-; AVX1-NEXT: # %bb.2: # %cond.false
-; AVX1-NEXT: rep bsfl %eax, %eax
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-; AVX1-NEXT: .LBB0_1:
-; AVX1-NEXT: movl $32, %eax
+; AVX1-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000
+; AVX1-NEXT: orq %rax, %rcx
+; AVX1-NEXT: rep bsfq %rcx, %rax
+; AVX1-NEXT: # kill: def $eax killed $eax killed $rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@@ -36,14 +32,10 @@ define i32 @PR90847(<8 x float> %x) nounwind {
; AVX2-NEXT: vminps %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vcmpeqps %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vmovmskps %ymm0, %eax
-; AVX2-NEXT: testl %eax, %eax
-; AVX2-NEXT: je .LBB0_1
-; AVX2-NEXT: # %bb.2: # %cond.false
-; AVX2-NEXT: rep bsfl %eax, %eax
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-; AVX2-NEXT: .LBB0_1:
-; AVX2-NEXT: movl $32, %eax
+; AVX2-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000
+; AVX2-NEXT: orq %rax, %rcx
+; AVX2-NEXT: rep bsfq %rcx, %rax
+; AVX2-NEXT: # kill: def $eax killed $eax killed $rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
entry:
More information about the llvm-commits
mailing list