[llvm] [X86] Use the standard cmp+cmov for select (X != 0), -1, Y if we will be setting sbb to 0 anyway (PR #149672)

Sun Jul 20 07:01:00 PDT 2025

https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/149672

>From bebcd9d49b4ff5dca6ae19b11321384f41160ff4 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Sat, 19 Jul 2025 15:37:15 -0400
Subject: [PATCH 1/3] [X86] Use the standard cmp+cmov for select (X != 0), -1,
 Y if we will be setting sbb to 0 anyway

If we have to set sbb, then we lose the point of it, and it is not like we do this for any other cmp, just with 0.
---
 llvm/lib/Target/X86/X86ISelLowering.cpp     |  24 +++
 llvm/test/CodeGen/X86/bmi-select-distrib.ll | 161 +++++++-------------
 llvm/test/CodeGen/X86/pr35972.ll            |   7 +-
 llvm/test/CodeGen/X86/sbb-false-dep.ll      |  34 ++---
 llvm/test/CodeGen/X86/select.ll             |  88 +++++++----
 llvm/test/CodeGen/X86/umul_fix_sat.ll       |  24 ++-
 6 files changed, 171 insertions(+), 167 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d91ea1ea1bb1b..85238722103db 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24917,6 +24917,30 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
   if ((X86CC == X86::COND_E || X86CC == X86::COND_NE) &&
       (isAllOnesConstant(LHS) || isAllOnesConstant(RHS))) {
     SDValue Y = isAllOnesConstant(RHS) ? LHS : RHS;
+
+    // If CMOV is available, use it instead. Only prefer CMOV when SBB
+    // dependency breaking is not available or when CMOV is likely to be more
+    // efficient
+    if (Subtarget.canUseCMOV() &&
+        (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
+        !Subtarget.hasSBBDepBreaking()) {
+      // Create comparison against zero to set EFLAGS
+      SDValue Zero = DAG.getConstant(0, DL, CmpVT);
+      SDValue Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpVal, Zero);
+
+      // For CMOV: FalseVal is used when condition is false, TrueVal when
+      // condition is true We want: when X==0 return -1, when X!=0 return Y So
+      // condition should be (X == 0), TrueVal = -1, FalseVal = Y The SBB
+      // pattern implements: (CmpVal X86CC 0) ? LHS : RHS We need to implement
+      // exactly the same select operation with CMOV CMOV semantics: CMOV
+      // condition, TrueVal, FalseVal Returns TrueVal if condition is true,
+      // FalseVal if condition is false
+
+      return DAG.getNode(X86ISD::CMOV, DL, VT, RHS, LHS,
+                         DAG.getTargetConstant(X86CC, DL, MVT::i8), Cmp);
+    }
+
+    // Fall back to SBB pattern for older processors or unsupported types
     SDVTList CmpVTs = DAG.getVTList(CmpVT, MVT::i32);
 
     // 'X - 1' sets the carry flag if X == 0.
diff --git a/llvm/test/CodeGen/X86/bmi-select-distrib.ll b/llvm/test/CodeGen/X86/bmi-select-distrib.ll
index e5696ded4fbf1..dc98d338cc382 100644
--- a/llvm/test/CodeGen/X86/bmi-select-distrib.ll
+++ b/llvm/test/CodeGen/X86/bmi-select-distrib.ll
@@ -128,30 +128,23 @@ define i32 @and_neg_select_pos_i32(i1 %a0, i32 inreg %a1) nounwind {
 define i16 @and_select_neg_i16(i1 %a0, i16 %a1) nounwind {
 ; X86-LABEL: and_select_neg_i16:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    andb $1, %cl
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl %edx, %esi
-; X86-NEXT:    negl %esi
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    cmpb $1, %cl
-; X86-NEXT:    sbbl %eax, %eax
-; X86-NEXT:    orl %esi, %eax
-; X86-NEXT:    andl %edx, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    negl %edx
+; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT:    movw $-1, %ax
+; X86-NEXT:    cmovnew %dx, %ax
+; X86-NEXT:    andl %ecx, %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
-; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: and_select_neg_i16:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $1, %dil
 ; X64-NEXT:    movl %esi, %ecx
 ; X64-NEXT:    negl %ecx
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    cmpb $1, %dil
-; X64-NEXT:    sbbl %eax, %eax
-; X64-NEXT:    orl %ecx, %eax
+; X64-NEXT:    testb $1, %dil
+; X64-NEXT:    movw $-1, %ax
+; X64-NEXT:    cmovnew %cx, %ax
 ; X64-NEXT:    andl %esi, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
@@ -200,22 +193,17 @@ define <4 x i32> @and_select_neg_v4xi32(i1 %a0, <4 x i32> %a1) nounwind {
 define i32 @and_select_no_neg(i1 %a0, i32 inreg %a1) nounwind {
 ; X86-LABEL: and_select_no_neg:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    andb $1, %cl
-; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    cmpb $1, %cl
-; X86-NEXT:    sbbl %edx, %edx
-; X86-NEXT:    orl %eax, %edx
-; X86-NEXT:    andl %edx, %eax
+; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $-1, %ecx
+; X86-NEXT:    cmovnel %eax, %ecx
+; X86-NEXT:    andl %ecx, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: and_select_no_neg:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $1, %dil
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    cmpb $1, %dil
-; X64-NEXT:    sbbl %eax, %eax
-; X64-NEXT:    orl %esi, %eax
+; X64-NEXT:    testb $1, %dil
+; X64-NEXT:    movl $-1, %eax
+; X64-NEXT:    cmovnel %esi, %eax
 ; X64-NEXT:    andl %esi, %eax
 ; X64-NEXT:    retq
   %sub = sub i32 %a1, 0
@@ -255,26 +243,19 @@ define i32 @and_select_neg_wrong_const(i1 %a0, i32 inreg %a1) nounwind {
 define i32 @and_select_neg_different_op(i1 %a0, i32 inreg %a1, i32 inreg %a2) nounwind {
 ; X86-LABEL: and_select_neg_different_op:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    andb $1, %cl
 ; X86-NEXT:    negl %edx
-; X86-NEXT:    xorl %esi, %esi
-; X86-NEXT:    cmpb $1, %cl
-; X86-NEXT:    sbbl %esi, %esi
-; X86-NEXT:    orl %edx, %esi
-; X86-NEXT:    andl %esi, %eax
-; X86-NEXT:    popl %esi
+; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $-1, %ecx
+; X86-NEXT:    cmovnel %edx, %ecx
+; X86-NEXT:    andl %ecx, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: and_select_neg_different_op:
 ; X64:       # %bb.0:
-; X64-NEXT:    andb $1, %dil
 ; X64-NEXT:    negl %edx
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    cmpb $1, %dil
-; X64-NEXT:    sbbl %eax, %eax
-; X64-NEXT:    orl %edx, %eax
+; X64-NEXT:    testb $1, %dil
+; X64-NEXT:    movl $-1, %eax
+; X64-NEXT:    cmovnel %edx, %eax
 ; X64-NEXT:    andl %esi, %eax
 ; X64-NEXT:    retq
   %sub = sub i32 0, %a2
@@ -427,29 +408,22 @@ define i64 @and_select_sub_1_to_blsr_i64(i1 %a0, i64 %a1) nounwind {
 define i16 @and_select_sub_1_i16(i1 %a0, i16 %a1) nounwind {
 ; X86-LABEL: and_select_sub_1_i16:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    andb $1, %cl
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    leal -1(%edx), %esi
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    cmpb $1, %cl
-; X86-NEXT:    sbbl %eax, %eax
-; X86-NEXT:    orl %esi, %eax
-; X86-NEXT:    andl %edx, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal -1(%ecx), %edx
+; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT:    movw $-1, %ax
+; X86-NEXT:    cmovnew %dx, %ax
+; X86-NEXT:    andl %ecx, %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
-; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: and_select_sub_1_i16:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-NEXT:    andb $1, %dil
 ; X64-NEXT:    leal -1(%rsi), %ecx
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    cmpb $1, %dil
-; X64-NEXT:    sbbl %eax, %eax
-; X64-NEXT:    orl %ecx, %eax
+; X64-NEXT:    testb $1, %dil
+; X64-NEXT:    movw $-1, %ax
+; X64-NEXT:    cmovnew %cx, %ax
 ; X64-NEXT:    andl %esi, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
@@ -492,27 +466,20 @@ define <4 x i32> @and_select_sub_1_v4xi32(i1 %a0, <4 x i32> %a1) nounwind {
 define i32 @and_select_no_sub_1(i1 %a0, i32 inreg %a1) nounwind {
 ; X86-LABEL: and_select_no_sub_1:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    andb $1, %cl
-; X86-NEXT:    leal -2(%eax), %edx
-; X86-NEXT:    xorl %esi, %esi
-; X86-NEXT:    cmpb $1, %cl
-; X86-NEXT:    sbbl %esi, %esi
-; X86-NEXT:    orl %edx, %esi
-; X86-NEXT:    andl %esi, %eax
-; X86-NEXT:    popl %esi
+; X86-NEXT:    leal -2(%eax), %ecx
+; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $-1, %edx
+; X86-NEXT:    cmovnel %ecx, %edx
+; X86-NEXT:    andl %edx, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: and_select_no_sub_1:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-NEXT:    andb $1, %dil
 ; X64-NEXT:    leal -2(%rsi), %ecx
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    cmpb $1, %dil
-; X64-NEXT:    sbbl %eax, %eax
-; X64-NEXT:    orl %ecx, %eax
+; X64-NEXT:    testb $1, %dil
+; X64-NEXT:    movl $-1, %eax
+; X64-NEXT:    cmovnel %ecx, %eax
 ; X64-NEXT:    andl %esi, %eax
 ; X64-NEXT:    retq
   %sub = add i32 %a1, -2
@@ -551,27 +518,20 @@ define i32 @and_select_sub_1_wrong_const(i1 %a0, i32 inreg %a1) nounwind {
 define i32 @and_select_sub_1_different_op(i1 %a0, i32 inreg %a1, i32 inreg %a2) nounwind {
 ; X86-LABEL: and_select_sub_1_different_op:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    andb $1, %cl
-; X86-NEXT:    decl %edx
-; X86-NEXT:    xorl %esi, %esi
-; X86-NEXT:    cmpb $1, %cl
-; X86-NEXT:    sbbl %esi, %esi
-; X86-NEXT:    orl %edx, %esi
-; X86-NEXT:    andl %esi, %eax
-; X86-NEXT:    popl %esi
+; X86-NEXT:    leal -1(%edx), %ecx
+; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $-1, %edx
+; X86-NEXT:    cmovnel %ecx, %edx
+; X86-NEXT:    andl %edx, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: and_select_sub_1_different_op:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $edx killed $edx def $rdx
-; X64-NEXT:    andb $1, %dil
 ; X64-NEXT:    leal -1(%rdx), %ecx
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    cmpb $1, %dil
-; X64-NEXT:    sbbl %eax, %eax
-; X64-NEXT:    orl %ecx, %eax
+; X64-NEXT:    testb $1, %dil
+; X64-NEXT:    movl $-1, %eax
+; X64-NEXT:    cmovnel %ecx, %eax
 ; X64-NEXT:    andl %esi, %eax
 ; X64-NEXT:    retq
   %sub = add i32 %a2, -1
@@ -809,27 +769,20 @@ define i32 @xor_select_no_sub_1(i1 %a0, i32 inreg %a1) nounwind {
 define i32 @xor_select_sub_1_wrong_const(i1 %a0, i32 inreg %a1) nounwind {
 ; X86-LABEL: xor_select_sub_1_wrong_const:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    andb $1, %cl
-; X86-NEXT:    leal -1(%eax), %edx
-; X86-NEXT:    xorl %esi, %esi
-; X86-NEXT:    cmpb $1, %cl
-; X86-NEXT:    sbbl %esi, %esi
-; X86-NEXT:    orl %edx, %esi
-; X86-NEXT:    xorl %esi, %eax
-; X86-NEXT:    popl %esi
+; X86-NEXT:    leal -1(%eax), %ecx
+; X86-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $-1, %edx
+; X86-NEXT:    cmovnel %ecx, %edx
+; X86-NEXT:    xorl %edx, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: xor_select_sub_1_wrong_const:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-NEXT:    andb $1, %dil
 ; X64-NEXT:    leal -1(%rsi), %ecx
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    cmpb $1, %dil
-; X64-NEXT:    sbbl %eax, %eax
-; X64-NEXT:    orl %ecx, %eax
+; X64-NEXT:    testb $1, %dil
+; X64-NEXT:    movl $-1, %eax
+; X64-NEXT:    cmovnel %ecx, %eax
 ; X64-NEXT:    xorl %esi, %eax
 ; X64-NEXT:    retq
   %sub = add i32 %a1, -1
diff --git a/llvm/test/CodeGen/X86/pr35972.ll b/llvm/test/CodeGen/X86/pr35972.ll
index 981c47800c0f3..e609981c2e752 100644
--- a/llvm/test/CodeGen/X86/pr35972.ll
+++ b/llvm/test/CodeGen/X86/pr35972.ll
@@ -6,9 +6,10 @@ define void @test3(i32 %c, ptr %ptr) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    xorl %ecx, %ecx
-; CHECK-NEXT:    cmpl $1, {{[0-9]+}}(%esp)
-; CHECK-NEXT:    sbbl %ecx, %ecx
-; CHECK-NEXT:    kmovd %ecx, %k0
+; CHECK-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    movl $-1, %edx
+; CHECK-NEXT:    cmovnel %ecx, %edx
+; CHECK-NEXT:    kmovd %edx, %k0
 ; CHECK-NEXT:    kunpckdq %k0, %k0, %k0
 ; CHECK-NEXT:    kmovq %k0, (%eax)
 ; CHECK-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/sbb-false-dep.ll b/llvm/test/CodeGen/X86/sbb-false-dep.ll
index 34a92cb58692b..f53f362289c45 100644
--- a/llvm/test/CodeGen/X86/sbb-false-dep.ll
+++ b/llvm/test/CodeGen/X86/sbb-false-dep.ll
@@ -12,36 +12,32 @@ define i32 @mallocbench_gs(ptr noundef %0, ptr noundef %1, i32 noundef %2, i32 n
 ; CHECK-NEXT:    pushq %r14
 ; CHECK-NEXT:    pushq %r12
 ; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    movl %r8d, %ebp
-; CHECK-NEXT:    movl %ecx, %r14d
-; CHECK-NEXT:    movl %edx, %r15d
-; CHECK-NEXT:    movq %rsi, %rbx
+; CHECK-NEXT:    movl %r8d, %ebx
+; CHECK-NEXT:    movl %ecx, %ebp
+; CHECK-NEXT:    movl %edx, %r14d
+; CHECK-NEXT:    movq %rsi, %r15
 ; CHECK-NEXT:    movq %rdi, %r12
 ; CHECK-NEXT:    movq (%rsi), %rdi
 ; CHECK-NEXT:    movq 8(%rsi), %rsi
-; CHECK-NEXT:    movq %rbx, %rdx
+; CHECK-NEXT:    movq %r15, %rdx
 ; CHECK-NEXT:    callq foo1 at PLT
-; CHECK-NEXT:    movq 8(%rbx), %rax
+; CHECK-NEXT:    testl %ebx, %ebx
+; CHECK-NEXT:    movq 8(%r15), %rax
 ; CHECK-NEXT:    movq (%rax), %rax
-; CHECK-NEXT:    xorl %r10d, %r10d
-; CHECK-NEXT:    movl %ebp, %ecx
-; CHECK-NEXT:    negl %ecx
-; CHECK-NEXT:    movl $0, %r11d
-; CHECK-NEXT:    sbbq %r11, %r11
-; CHECK-NEXT:    orq %rax, %r11
-; CHECK-NEXT:    cmpl $1, %ebp
-; CHECK-NEXT:    sbbq %r10, %r10
-; CHECK-NEXT:    orq %rax, %r10
+; CHECK-NEXT:    movq $-1, %rcx
+; CHECK-NEXT:    movq $-1, %r10
+; CHECK-NEXT:    cmoveq %rax, %r10
+; CHECK-NEXT:    cmoveq %rcx, %rax
 ; CHECK-NEXT:    subq $8, %rsp
 ; CHECK-NEXT:    movq %r12, %rdi
-; CHECK-NEXT:    movl %r15d, %esi
-; CHECK-NEXT:    movl %r14d, %edx
+; CHECK-NEXT:    movl %r14d, %esi
+; CHECK-NEXT:    movl %ebp, %edx
 ; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    xorl %r8d, %r8d
 ; CHECK-NEXT:    xorl %r9d, %r9d
+; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    pushq %r10
-; CHECK-NEXT:    pushq %r11
-; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    pushq %r15
 ; CHECK-NEXT:    callq foo2 at PLT
 ; CHECK-NEXT:    addq $32, %rsp
 ; CHECK-NEXT:    popq %rbx
diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll
index 4e31b48ec5cec..f5ac941fda930 100644
--- a/llvm/test/CodeGen/X86/select.ll
+++ b/llvm/test/CodeGen/X86/select.ll
@@ -627,13 +627,21 @@ define void @test8(i1 %c, ptr %dst.addr, <6 x i32> %src1,<6 x i32> %src2) nounwi
 ;; Test integer select between values and constants.
 
 define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; CHECK-LABEL: test9:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    cmpq $1, %rdi
-; CHECK-NEXT:    sbbq %rax, %rax
-; CHECK-NEXT:    orq %rsi, %rax
-; CHECK-NEXT:    retq
+; GENERIC-LABEL: test9:
+; GENERIC:       ## %bb.0:
+; GENERIC-NEXT:    testq %rdi, %rdi
+; GENERIC-NEXT:    movq $-1, %rax
+; GENERIC-NEXT:    cmovneq %rsi, %rax
+; GENERIC-NEXT:    retq
+;
+; ATOM-LABEL: test9:
+; ATOM:       ## %bb.0:
+; ATOM-NEXT:    testq %rdi, %rdi
+; ATOM-NEXT:    movq $-1, %rax
+; ATOM-NEXT:    cmovneq %rsi, %rax
+; ATOM-NEXT:    nop
+; ATOM-NEXT:    nop
+; ATOM-NEXT:    retq
 ;
 ; ATHLON-LABEL: test9:
 ; ATHLON:       ## %bb.0:
@@ -667,13 +675,21 @@ define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 
 ;; Same as test9
 define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; CHECK-LABEL: test9a:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    cmpq $1, %rdi
-; CHECK-NEXT:    sbbq %rax, %rax
-; CHECK-NEXT:    orq %rsi, %rax
-; CHECK-NEXT:    retq
+; GENERIC-LABEL: test9a:
+; GENERIC:       ## %bb.0:
+; GENERIC-NEXT:    testq %rdi, %rdi
+; GENERIC-NEXT:    movq $-1, %rax
+; GENERIC-NEXT:    cmovneq %rsi, %rax
+; GENERIC-NEXT:    retq
+;
+; ATOM-LABEL: test9a:
+; ATOM:       ## %bb.0:
+; ATOM-NEXT:    testq %rdi, %rdi
+; ATOM-NEXT:    movq $-1, %rax
+; ATOM-NEXT:    cmovneq %rsi, %rax
+; ATOM-NEXT:    nop
+; ATOM-NEXT:    nop
+; ATOM-NEXT:    retq
 ;
 ; ATHLON-LABEL: test9a:
 ; ATHLON:       ## %bb.0:
@@ -779,13 +795,21 @@ define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 }
 
 define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; CHECK-LABEL: test11:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    negq %rdi
-; CHECK-NEXT:    sbbq %rax, %rax
-; CHECK-NEXT:    orq %rsi, %rax
-; CHECK-NEXT:    retq
+; GENERIC-LABEL: test11:
+; GENERIC:       ## %bb.0:
+; GENERIC-NEXT:    testq %rdi, %rdi
+; GENERIC-NEXT:    movq $-1, %rax
+; GENERIC-NEXT:    cmoveq %rsi, %rax
+; GENERIC-NEXT:    retq
+;
+; ATOM-LABEL: test11:
+; ATOM:       ## %bb.0:
+; ATOM-NEXT:    testq %rdi, %rdi
+; ATOM-NEXT:    movq $-1, %rax
+; ATOM-NEXT:    cmoveq %rsi, %rax
+; ATOM-NEXT:    nop
+; ATOM-NEXT:    nop
+; ATOM-NEXT:    retq
 ;
 ; ATHLON-LABEL: test11:
 ; ATHLON:       ## %bb.0:
@@ -818,13 +842,21 @@ define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 }
 
 define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; CHECK-LABEL: test11a:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    negq %rdi
-; CHECK-NEXT:    sbbq %rax, %rax
-; CHECK-NEXT:    orq %rsi, %rax
-; CHECK-NEXT:    retq
+; GENERIC-LABEL: test11a:
+; GENERIC:       ## %bb.0:
+; GENERIC-NEXT:    testq %rdi, %rdi
+; GENERIC-NEXT:    movq $-1, %rax
+; GENERIC-NEXT:    cmoveq %rsi, %rax
+; GENERIC-NEXT:    retq
+;
+; ATOM-LABEL: test11a:
+; ATOM:       ## %bb.0:
+; ATOM-NEXT:    testq %rdi, %rdi
+; ATOM-NEXT:    movq $-1, %rax
+; ATOM-NEXT:    cmoveq %rsi, %rax
+; ATOM-NEXT:    nop
+; ATOM-NEXT:    nop
+; ATOM-NEXT:    retq
 ;
 ; ATHLON-LABEL: test11a:
 ; ATHLON:       ## %bb.0:
diff --git a/llvm/test/CodeGen/X86/umul_fix_sat.ll b/llvm/test/CodeGen/X86/umul_fix_sat.ll
index 8c7078c726328..6728d25abf1b6 100644
--- a/llvm/test/CodeGen/X86/umul_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/umul_fix_sat.ll
@@ -441,33 +441,31 @@ define i64 @func7(i64 %x, i64 %y) nounwind {
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    movl %ebx, %eax
 ; X86-NEXT:    mull %ebp
 ; X86-NEXT:    movl %edx, %ecx
 ; X86-NEXT:    movl %eax, %esi
-; X86-NEXT:    movl %edi, %eax
-; X86-NEXT:    mull %ebx
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    mull %edi
 ; X86-NEXT:    addl %edx, %esi
 ; X86-NEXT:    adcl $0, %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    mull %ebp
-; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %edx, %ebx
 ; X86-NEXT:    movl %eax, %ebp
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    mull %ebx
+; X86-NEXT:    mull %edi
 ; X86-NEXT:    addl %esi, %eax
 ; X86-NEXT:    adcl %ecx, %edx
-; X86-NEXT:    adcl $0, %edi
+; X86-NEXT:    adcl $0, %ebx
 ; X86-NEXT:    addl %ebp, %edx
-; X86-NEXT:    adcl $0, %edi
-; X86-NEXT:    xorl %ecx, %ecx
-; X86-NEXT:    negl %edi
-; X86-NEXT:    sbbl %ecx, %ecx
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:    adcl $0, %ebx
+; X86-NEXT:    movl $-1, %ecx
+; X86-NEXT:    cmovnel %ecx, %eax
+; X86-NEXT:    cmovnel %ecx, %edx
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebx

>From 2ce9ac3acecde41939890ceb3702d4ac0c223115 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Sun, 20 Jul 2025 10:00:14 -0400
Subject: [PATCH 2/3] Fix regression and review concerns

---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 20 +++-----------------
 llvm/test/CodeGen/X86/pr35972.ll        |  7 +++----
 2 files changed, 6 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 85238722103db..47e86d1ad87c3 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24921,24 +24921,10 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
     // If CMOV is available, use it instead. Only prefer CMOV when SBB
     // dependency breaking is not available or when CMOV is likely to be more
     // efficient
-    if (Subtarget.canUseCMOV() &&
+    if (!isNullConstant(Y) && Subtarget.canUseCMOV() &&
         (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
-        !Subtarget.hasSBBDepBreaking()) {
-      // Create comparison against zero to set EFLAGS
-      SDValue Zero = DAG.getConstant(0, DL, CmpVT);
-      SDValue Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpVal, Zero);
-
-      // For CMOV: FalseVal is used when condition is false, TrueVal when
-      // condition is true We want: when X==0 return -1, when X!=0 return Y So
-      // condition should be (X == 0), TrueVal = -1, FalseVal = Y The SBB
-      // pattern implements: (CmpVal X86CC 0) ? LHS : RHS We need to implement
-      // exactly the same select operation with CMOV CMOV semantics: CMOV
-      // condition, TrueVal, FalseVal Returns TrueVal if condition is true,
-      // FalseVal if condition is false
-
-      return DAG.getNode(X86ISD::CMOV, DL, VT, RHS, LHS,
-                         DAG.getTargetConstant(X86CC, DL, MVT::i8), Cmp);
-    }
+        !Subtarget.hasSBBDepBreaking())
+      return SDValue();
 
     // Fall back to SBB pattern for older processors or unsupported types
     SDVTList CmpVTs = DAG.getVTList(CmpVT, MVT::i32);
diff --git a/llvm/test/CodeGen/X86/pr35972.ll b/llvm/test/CodeGen/X86/pr35972.ll
index e609981c2e752..981c47800c0f3 100644
--- a/llvm/test/CodeGen/X86/pr35972.ll
+++ b/llvm/test/CodeGen/X86/pr35972.ll
@@ -6,10 +6,9 @@ define void @test3(i32 %c, ptr %ptr) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    xorl %ecx, %ecx
-; CHECK-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
-; CHECK-NEXT:    movl $-1, %edx
-; CHECK-NEXT:    cmovnel %ecx, %edx
-; CHECK-NEXT:    kmovd %edx, %k0
+; CHECK-NEXT:    cmpl $1, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    sbbl %ecx, %ecx
+; CHECK-NEXT:    kmovd %ecx, %k0
 ; CHECK-NEXT:    kunpckdq %k0, %k0, %k0
 ; CHECK-NEXT:    kmovq %k0, (%eax)
 ; CHECK-NEXT:    retl

>From 74a7e698db7bc7078f8424bab68799baa3cfde7c Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Sun, 20 Jul 2025 10:00:48 -0400
Subject: [PATCH 3/3] Grammar (NFC)

---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 47e86d1ad87c3..d206f3bba0b54 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24920,13 +24920,13 @@ static SDValue LowerSELECTWithCmpZero(SDValue CmpVal, SDValue LHS, SDValue RHS,
 
     // If CMOV is available, use it instead. Only prefer CMOV when SBB
     // dependency breaking is not available or when CMOV is likely to be more
-    // efficient
+    // efficient.
     if (!isNullConstant(Y) && Subtarget.canUseCMOV() &&
         (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
         !Subtarget.hasSBBDepBreaking())
       return SDValue();
 
-    // Fall back to SBB pattern for older processors or unsupported types
+    // Fall back to SBB pattern for older processors or unsupported types.
     SDVTList CmpVTs = DAG.getVTList(CmpVT, MVT::i32);
 
     // 'X - 1' sets the carry flag if X == 0.