[llvm] [X86] Use the standard cmp+cmov for select (X != 0), -1, Y if we will be setting sbb to 0 anyway (PR #149672)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 20 07:01:00 PDT 2025
https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/149672
>From bebcd9d49b4ff5dca6ae19b11321384f41160ff4 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Sat, 19 Jul 2025 15:37:15 -0400
Subject: [PATCH 1/3] [X86] Use the standard cmp+cmov for select (X != 0), -1,
Y if we will be setting sbb to 0 anyway
If we have to set sbb, then we lose the point of it, and it is not like we do this for any other cmp, just with 0.
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 24 +++
llvm/test/CodeGen/X86/bmi-select-distrib.ll | 161 +++++++-------------
llvm/test/CodeGen/X86/pr35972.ll | 7 +-
llvm/test/CodeGen/X86/sbb-false-dep.ll | 34 ++---
llvm/test/CodeGen/X86/select.ll | 88 +++++++----
llvm/test/CodeGen/X86/umul_fix_sat.ll | 24 ++-
6 files changed, 171 insertions(+), 167 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d91ea1ea1bb1b..85238722103db 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24917,6 +24917,30 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
if ((X86CC == X86::COND_E || X86CC == X86::COND_NE) &&
(isAllOnesConstant(LHS) || isAllOnesConstant(RHS))) {
SDValue Y = isAllOnesConstant(RHS) ? LHS : RHS;
+
+ // If CMOV is available, use it instead. Only prefer CMOV when SBB
+ // dependency breaking is not available or when CMOV is likely to be more
+ // efficient
+ if (Subtarget.canUseCMOV() &&
+ (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
+ !Subtarget.hasSBBDepBreaking()) {
+ // Create comparison against zero to set EFLAGS
+ SDValue Zero = DAG.getConstant(0, DL, CmpVT);
+ SDValue Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpVal, Zero);
+
+ // For CMOV: FalseVal is used when condition is false, TrueVal when
+ // condition is true We want: when X==0 return -1, when X!=0 return Y So
+ // condition should be (X == 0), TrueVal = -1, FalseVal = Y The SBB
+ // pattern implements: (CmpVal X86CC 0) ? LHS : RHS We need to implement
+ // exactly the same select operation with CMOV CMOV semantics: CMOV
+ // condition, TrueVal, FalseVal Returns TrueVal if condition is true,
+ // FalseVal if condition is false
+
+ return DAG.getNode(X86ISD::CMOV, DL, VT, RHS, LHS,
+ DAG.getTargetConstant(X86CC, DL, MVT::i8), Cmp);
+ }
+
+ // Fall back to SBB pattern for older processors or unsupported types
SDVTList CmpVTs = DAG.getVTList(CmpVT, MVT::i32);
// 'X - 1' sets the carry flag if X == 0.
diff --git a/llvm/test/CodeGen/X86/bmi-select-distrib.ll b/llvm/test/CodeGen/X86/bmi-select-distrib.ll
index e5696ded4fbf1..dc98d338cc382 100644
--- a/llvm/test/CodeGen/X86/bmi-select-distrib.ll
+++ b/llvm/test/CodeGen/X86/bmi-select-distrib.ll
@@ -128,30 +128,23 @@ define i32 @and_neg_select_pos_i32(i1 %a0, i32 inreg %a1) nounwind {
define i16 @and_select_neg_i16(i1 %a0, i16 %a1) nounwind {
; X86-LABEL: and_select_neg_i16:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andb $1, %cl
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl %edx, %esi
-; X86-NEXT: negl %esi
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpb $1, %cl
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl %esi, %eax
-; X86-NEXT: andl %edx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: negl %edx
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movw $-1, %ax
+; X86-NEXT: cmovnew %dx, %ax
+; X86-NEXT: andl %ecx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: and_select_neg_i16:
; X64: # %bb.0:
-; X64-NEXT: andb $1, %dil
; X64-NEXT: movl %esi, %ecx
; X64-NEXT: negl %ecx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpb $1, %dil
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl %ecx, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: movw $-1, %ax
+; X64-NEXT: cmovnew %cx, %ax
; X64-NEXT: andl %esi, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
@@ -200,22 +193,17 @@ define <4 x i32> @and_select_neg_v4xi32(i1 %a0, <4 x i32> %a1) nounwind {
define i32 @and_select_no_neg(i1 %a0, i32 inreg %a1) nounwind {
; X86-LABEL: and_select_no_neg:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andb $1, %cl
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: cmpb $1, %cl
-; X86-NEXT: sbbl %edx, %edx
-; X86-NEXT: orl %eax, %edx
-; X86-NEXT: andl %edx, %eax
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $-1, %ecx
+; X86-NEXT: cmovnel %eax, %ecx
+; X86-NEXT: andl %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: and_select_no_neg:
; X64: # %bb.0:
-; X64-NEXT: andb $1, %dil
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpb $1, %dil
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl %esi, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: movl $-1, %eax
+; X64-NEXT: cmovnel %esi, %eax
; X64-NEXT: andl %esi, %eax
; X64-NEXT: retq
%sub = sub i32 %a1, 0
@@ -255,26 +243,19 @@ define i32 @and_select_neg_wrong_const(i1 %a0, i32 inreg %a1) nounwind {
define i32 @and_select_neg_different_op(i1 %a0, i32 inreg %a1, i32 inreg %a2) nounwind {
; X86-LABEL: and_select_neg_different_op:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andb $1, %cl
; X86-NEXT: negl %edx
-; X86-NEXT: xorl %esi, %esi
-; X86-NEXT: cmpb $1, %cl
-; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: orl %edx, %esi
-; X86-NEXT: andl %esi, %eax
-; X86-NEXT: popl %esi
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $-1, %ecx
+; X86-NEXT: cmovnel %edx, %ecx
+; X86-NEXT: andl %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: and_select_neg_different_op:
; X64: # %bb.0:
-; X64-NEXT: andb $1, %dil
; X64-NEXT: negl %edx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpb $1, %dil
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl %edx, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: movl $-1, %eax
+; X64-NEXT: cmovnel %edx, %eax
; X64-NEXT: andl %esi, %eax
; X64-NEXT: retq
%sub = sub i32 0, %a2
@@ -427,29 +408,22 @@ define i64 @and_select_sub_1_to_blsr_i64(i1 %a0, i64 %a1) nounwind {
define i16 @and_select_sub_1_i16(i1 %a0, i16 %a1) nounwind {
; X86-LABEL: and_select_sub_1_i16:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andb $1, %cl
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: leal -1(%edx), %esi
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpb $1, %cl
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl %esi, %eax
-; X86-NEXT: andl %edx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal -1(%ecx), %edx
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movw $-1, %ax
+; X86-NEXT: cmovnew %dx, %ax
+; X86-NEXT: andl %ecx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: and_select_sub_1_i16:
; X64: # %bb.0:
; X64-NEXT: # kill: def $esi killed $esi def $rsi
-; X64-NEXT: andb $1, %dil
; X64-NEXT: leal -1(%rsi), %ecx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpb $1, %dil
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl %ecx, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: movw $-1, %ax
+; X64-NEXT: cmovnew %cx, %ax
; X64-NEXT: andl %esi, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
@@ -492,27 +466,20 @@ define <4 x i32> @and_select_sub_1_v4xi32(i1 %a0, <4 x i32> %a1) nounwind {
define i32 @and_select_no_sub_1(i1 %a0, i32 inreg %a1) nounwind {
; X86-LABEL: and_select_no_sub_1:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andb $1, %cl
-; X86-NEXT: leal -2(%eax), %edx
-; X86-NEXT: xorl %esi, %esi
-; X86-NEXT: cmpb $1, %cl
-; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: orl %edx, %esi
-; X86-NEXT: andl %esi, %eax
-; X86-NEXT: popl %esi
+; X86-NEXT: leal -2(%eax), %ecx
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $-1, %edx
+; X86-NEXT: cmovnel %ecx, %edx
+; X86-NEXT: andl %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: and_select_no_sub_1:
; X64: # %bb.0:
; X64-NEXT: # kill: def $esi killed $esi def $rsi
-; X64-NEXT: andb $1, %dil
; X64-NEXT: leal -2(%rsi), %ecx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpb $1, %dil
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl %ecx, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: movl $-1, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: andl %esi, %eax
; X64-NEXT: retq
%sub = add i32 %a1, -2
@@ -551,27 +518,20 @@ define i32 @and_select_sub_1_wrong_const(i1 %a0, i32 inreg %a1) nounwind {
define i32 @and_select_sub_1_different_op(i1 %a0, i32 inreg %a1, i32 inreg %a2) nounwind {
; X86-LABEL: and_select_sub_1_different_op:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andb $1, %cl
-; X86-NEXT: decl %edx
-; X86-NEXT: xorl %esi, %esi
-; X86-NEXT: cmpb $1, %cl
-; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: orl %edx, %esi
-; X86-NEXT: andl %esi, %eax
-; X86-NEXT: popl %esi
+; X86-NEXT: leal -1(%edx), %ecx
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $-1, %edx
+; X86-NEXT: cmovnel %ecx, %edx
+; X86-NEXT: andl %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: and_select_sub_1_different_op:
; X64: # %bb.0:
; X64-NEXT: # kill: def $edx killed $edx def $rdx
-; X64-NEXT: andb $1, %dil
; X64-NEXT: leal -1(%rdx), %ecx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpb $1, %dil
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl %ecx, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: movl $-1, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: andl %esi, %eax
; X64-NEXT: retq
%sub = add i32 %a2, -1
@@ -809,27 +769,20 @@ define i32 @xor_select_no_sub_1(i1 %a0, i32 inreg %a1) nounwind {
define i32 @xor_select_sub_1_wrong_const(i1 %a0, i32 inreg %a1) nounwind {
; X86-LABEL: xor_select_sub_1_wrong_const:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andb $1, %cl
-; X86-NEXT: leal -1(%eax), %edx
-; X86-NEXT: xorl %esi, %esi
-; X86-NEXT: cmpb $1, %cl
-; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: orl %edx, %esi
-; X86-NEXT: xorl %esi, %eax
-; X86-NEXT: popl %esi
+; X86-NEXT: leal -1(%eax), %ecx
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $-1, %edx
+; X86-NEXT: cmovnel %ecx, %edx
+; X86-NEXT: xorl %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: xor_select_sub_1_wrong_const:
; X64: # %bb.0:
; X64-NEXT: # kill: def $esi killed $esi def $rsi
-; X64-NEXT: andb $1, %dil
; X64-NEXT: leal -1(%rsi), %ecx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpb $1, %dil
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl %ecx, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: movl $-1, %eax
+; X64-NEXT: cmovnel %ecx, %eax
; X64-NEXT: xorl %esi, %eax
; X64-NEXT: retq
%sub = add i32 %a1, -1
diff --git a/llvm/test/CodeGen/X86/pr35972.ll b/llvm/test/CodeGen/X86/pr35972.ll
index 981c47800c0f3..e609981c2e752 100644
--- a/llvm/test/CodeGen/X86/pr35972.ll
+++ b/llvm/test/CodeGen/X86/pr35972.ll
@@ -6,9 +6,10 @@ define void @test3(i32 %c, ptr %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: xorl %ecx, %ecx
-; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp)
-; CHECK-NEXT: sbbl %ecx, %ecx
-; CHECK-NEXT: kmovd %ecx, %k0
+; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; CHECK-NEXT: movl $-1, %edx
+; CHECK-NEXT: cmovnel %ecx, %edx
+; CHECK-NEXT: kmovd %edx, %k0
; CHECK-NEXT: kunpckdq %k0, %k0, %k0
; CHECK-NEXT: kmovq %k0, (%eax)
; CHECK-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/sbb-false-dep.ll b/llvm/test/CodeGen/X86/sbb-false-dep.ll
index 34a92cb58692b..f53f362289c45 100644
--- a/llvm/test/CodeGen/X86/sbb-false-dep.ll
+++ b/llvm/test/CodeGen/X86/sbb-false-dep.ll
@@ -12,36 +12,32 @@ define i32 @mallocbench_gs(ptr noundef %0, ptr noundef %1, i32 noundef %2, i32 n
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: pushq %r12
; CHECK-NEXT: pushq %rbx
-; CHECK-NEXT: movl %r8d, %ebp
-; CHECK-NEXT: movl %ecx, %r14d
-; CHECK-NEXT: movl %edx, %r15d
-; CHECK-NEXT: movq %rsi, %rbx
+; CHECK-NEXT: movl %r8d, %ebx
+; CHECK-NEXT: movl %ecx, %ebp
+; CHECK-NEXT: movl %edx, %r14d
+; CHECK-NEXT: movq %rsi, %r15
; CHECK-NEXT: movq %rdi, %r12
; CHECK-NEXT: movq (%rsi), %rdi
; CHECK-NEXT: movq 8(%rsi), %rsi
-; CHECK-NEXT: movq %rbx, %rdx
+; CHECK-NEXT: movq %r15, %rdx
; CHECK-NEXT: callq foo1 at PLT
-; CHECK-NEXT: movq 8(%rbx), %rax
+; CHECK-NEXT: testl %ebx, %ebx
+; CHECK-NEXT: movq 8(%r15), %rax
; CHECK-NEXT: movq (%rax), %rax
-; CHECK-NEXT: xorl %r10d, %r10d
-; CHECK-NEXT: movl %ebp, %ecx
-; CHECK-NEXT: negl %ecx
-; CHECK-NEXT: movl $0, %r11d
-; CHECK-NEXT: sbbq %r11, %r11
-; CHECK-NEXT: orq %rax, %r11
-; CHECK-NEXT: cmpl $1, %ebp
-; CHECK-NEXT: sbbq %r10, %r10
-; CHECK-NEXT: orq %rax, %r10
+; CHECK-NEXT: movq $-1, %rcx
+; CHECK-NEXT: movq $-1, %r10
+; CHECK-NEXT: cmoveq %rax, %r10
+; CHECK-NEXT: cmoveq %rcx, %rax
; CHECK-NEXT: subq $8, %rsp
; CHECK-NEXT: movq %r12, %rdi
-; CHECK-NEXT: movl %r15d, %esi
-; CHECK-NEXT: movl %r14d, %edx
+; CHECK-NEXT: movl %r14d, %esi
+; CHECK-NEXT: movl %ebp, %edx
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: xorl %r8d, %r8d
; CHECK-NEXT: xorl %r9d, %r9d
+; CHECK-NEXT: pushq %rax
; CHECK-NEXT: pushq %r10
-; CHECK-NEXT: pushq %r11
-; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: pushq %r15
; CHECK-NEXT: callq foo2 at PLT
; CHECK-NEXT: addq $32, %rsp
; CHECK-NEXT: popq %rbx
diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll
index 4e31b48ec5cec..f5ac941fda930 100644
--- a/llvm/test/CodeGen/X86/select.ll
+++ b/llvm/test/CodeGen/X86/select.ll
@@ -627,13 +627,21 @@ define void @test8(i1 %c, ptr %dst.addr, <6 x i32> %src1,<6 x i32> %src2) nounwi
;; Test integer select between values and constants.
define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; CHECK-LABEL: test9:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: cmpq $1, %rdi
-; CHECK-NEXT: sbbq %rax, %rax
-; CHECK-NEXT: orq %rsi, %rax
-; CHECK-NEXT: retq
+; GENERIC-LABEL: test9:
+; GENERIC: ## %bb.0:
+; GENERIC-NEXT: testq %rdi, %rdi
+; GENERIC-NEXT: movq $-1, %rax
+; GENERIC-NEXT: cmovneq %rsi, %rax
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test9:
+; ATOM: ## %bb.0:
+; ATOM-NEXT: testq %rdi, %rdi
+; ATOM-NEXT: movq $-1, %rax
+; ATOM-NEXT: cmovneq %rsi, %rax
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
;
; ATHLON-LABEL: test9:
; ATHLON: ## %bb.0:
@@ -667,13 +675,21 @@ define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
;; Same as test9
define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; CHECK-LABEL: test9a:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: cmpq $1, %rdi
-; CHECK-NEXT: sbbq %rax, %rax
-; CHECK-NEXT: orq %rsi, %rax
-; CHECK-NEXT: retq
+; GENERIC-LABEL: test9a:
+; GENERIC: ## %bb.0:
+; GENERIC-NEXT: testq %rdi, %rdi
+; GENERIC-NEXT: movq $-1, %rax
+; GENERIC-NEXT: cmovneq %rsi, %rax
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test9a:
+; ATOM: ## %bb.0:
+; ATOM-NEXT: testq %rdi, %rdi
+; ATOM-NEXT: movq $-1, %rax
+; ATOM-NEXT: cmovneq %rsi, %rax
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
;
; ATHLON-LABEL: test9a:
; ATHLON: ## %bb.0:
@@ -779,13 +795,21 @@ define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone {
}
define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; CHECK-LABEL: test11:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: negq %rdi
-; CHECK-NEXT: sbbq %rax, %rax
-; CHECK-NEXT: orq %rsi, %rax
-; CHECK-NEXT: retq
+; GENERIC-LABEL: test11:
+; GENERIC: ## %bb.0:
+; GENERIC-NEXT: testq %rdi, %rdi
+; GENERIC-NEXT: movq $-1, %rax
+; GENERIC-NEXT: cmoveq %rsi, %rax
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test11:
+; ATOM: ## %bb.0:
+; ATOM-NEXT: testq %rdi, %rdi
+; ATOM-NEXT: movq $-1, %rax
+; ATOM-NEXT: cmoveq %rsi, %rax
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
;
; ATHLON-LABEL: test11:
; ATHLON: ## %bb.0:
@@ -818,13 +842,21 @@ define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
}
define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; CHECK-LABEL: test11a:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: negq %rdi
-; CHECK-NEXT: sbbq %rax, %rax
-; CHECK-NEXT: orq %rsi, %rax
-; CHECK-NEXT: retq
+; GENERIC-LABEL: test11a:
+; GENERIC: ## %bb.0:
+; GENERIC-NEXT: testq %rdi, %rdi
+; GENERIC-NEXT: movq $-1, %rax
+; GENERIC-NEXT: cmoveq %rsi, %rax
+; GENERIC-NEXT: retq
+;
+; ATOM-LABEL: test11a:
+; ATOM: ## %bb.0:
+; ATOM-NEXT: testq %rdi, %rdi
+; ATOM-NEXT: movq $-1, %rax
+; ATOM-NEXT: cmoveq %rsi, %rax
+; ATOM-NEXT: nop
+; ATOM-NEXT: nop
+; ATOM-NEXT: retq
;
; ATHLON-LABEL: test11a:
; ATHLON: ## %bb.0:
diff --git a/llvm/test/CodeGen/X86/umul_fix_sat.ll b/llvm/test/CodeGen/X86/umul_fix_sat.ll
index 8c7078c726328..6728d25abf1b6 100644
--- a/llvm/test/CodeGen/X86/umul_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/umul_fix_sat.ll
@@ -441,33 +441,31 @@ define i64 @func7(i64 %x, i64 %y) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl %ebx, %eax
; X86-NEXT: mull %ebp
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: movl %eax, %esi
-; X86-NEXT: movl %edi, %eax
-; X86-NEXT: mull %ebx
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: mull %edi
; X86-NEXT: addl %edx, %esi
; X86-NEXT: adcl $0, %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mull %ebp
-; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %edx, %ebx
; X86-NEXT: movl %eax, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: mull %ebx
+; X86-NEXT: mull %edi
; X86-NEXT: addl %esi, %eax
; X86-NEXT: adcl %ecx, %edx
-; X86-NEXT: adcl $0, %edi
+; X86-NEXT: adcl $0, %ebx
; X86-NEXT: addl %ebp, %edx
-; X86-NEXT: adcl $0, %edi
-; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: negl %edi
-; X86-NEXT: sbbl %ecx, %ecx
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: movl $-1, %ecx
+; X86-NEXT: cmovnel %ecx, %eax
+; X86-NEXT: cmovnel %ecx, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
>From 2ce9ac3acecde41939890ceb3702d4ac0c223115 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Sun, 20 Jul 2025 10:00:14 -0400
Subject: [PATCH 2/3] Fix regression and review concerns
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 20 +++-----------------
llvm/test/CodeGen/X86/pr35972.ll | 7 +++----
2 files changed, 6 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 85238722103db..47e86d1ad87c3 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24921,24 +24921,10 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
// If CMOV is available, use it instead. Only prefer CMOV when SBB
// dependency breaking is not available or when CMOV is likely to be more
// efficient
- if (Subtarget.canUseCMOV() &&
+ if (!isNullConstant(Y) && Subtarget.canUseCMOV() &&
(VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
- !Subtarget.hasSBBDepBreaking()) {
- // Create comparison against zero to set EFLAGS
- SDValue Zero = DAG.getConstant(0, DL, CmpVT);
- SDValue Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpVal, Zero);
-
- // For CMOV: FalseVal is used when condition is false, TrueVal when
- // condition is true We want: when X==0 return -1, when X!=0 return Y So
- // condition should be (X == 0), TrueVal = -1, FalseVal = Y The SBB
- // pattern implements: (CmpVal X86CC 0) ? LHS : RHS We need to implement
- // exactly the same select operation with CMOV CMOV semantics: CMOV
- // condition, TrueVal, FalseVal Returns TrueVal if condition is true,
- // FalseVal if condition is false
-
- return DAG.getNode(X86ISD::CMOV, DL, VT, RHS, LHS,
- DAG.getTargetConstant(X86CC, DL, MVT::i8), Cmp);
- }
+ !Subtarget.hasSBBDepBreaking())
+ return SDValue();
// Fall back to SBB pattern for older processors or unsupported types
SDVTList CmpVTs = DAG.getVTList(CmpVT, MVT::i32);
diff --git a/llvm/test/CodeGen/X86/pr35972.ll b/llvm/test/CodeGen/X86/pr35972.ll
index e609981c2e752..981c47800c0f3 100644
--- a/llvm/test/CodeGen/X86/pr35972.ll
+++ b/llvm/test/CodeGen/X86/pr35972.ll
@@ -6,10 +6,9 @@ define void @test3(i32 %c, ptr %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: xorl %ecx, %ecx
-; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; CHECK-NEXT: movl $-1, %edx
-; CHECK-NEXT: cmovnel %ecx, %edx
-; CHECK-NEXT: kmovd %edx, %k0
+; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp)
+; CHECK-NEXT: sbbl %ecx, %ecx
+; CHECK-NEXT: kmovd %ecx, %k0
; CHECK-NEXT: kunpckdq %k0, %k0, %k0
; CHECK-NEXT: kmovq %k0, (%eax)
; CHECK-NEXT: retl
>From 74a7e698db7bc7078f8424bab68799baa3cfde7c Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Sun, 20 Jul 2025 10:00:48 -0400
Subject: [PATCH 3/3] Grammar (NFC)
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 47e86d1ad87c3..d206f3bba0b54 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24920,13 +24920,13 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
// If CMOV is available, use it instead. Only prefer CMOV when SBB
// dependency breaking is not available or when CMOV is likely to be more
- // efficient
+ // efficient.
if (!isNullConstant(Y) && Subtarget.canUseCMOV() &&
(VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
!Subtarget.hasSBBDepBreaking())
return SDValue();
- // Fall back to SBB pattern for older processors or unsupported types
+ // Fall back to SBB pattern for older processors or unsupported types.
SDVTList CmpVTs = DAG.getVTList(CmpVT, MVT::i32);
// 'X - 1' sets the carry flag if X == 0.
More information about the llvm-commits
mailing list