[llvm] a21a7dd - [X86] Optimize umax(X,1) (NFC)

Mon Mar 6 10:19:13 PST 2023

Author: Kazu Hirata
Date: 2023-03-06T10:18:57-08:00
New Revision: a21a7ddf5ad1f34874cddb4d10cbd40b8ce1bef8

URL: https://github.com/llvm/llvm-project/commit/a21a7ddf5ad1f34874cddb4d10cbd40b8ce1bef8
DIFF: https://github.com/llvm/llvm-project/commit/a21a7ddf5ad1f34874cddb4d10cbd40b8ce1bef8.diff

LOG: [X86] Optimize umax(X,1) (NFC)

Without this patch:

  %cond = call i32 @llvm.umax.i32(i32 %X, i32 1)

is compiled as:

  83 ff 02                   cmp    $0x2,%edi
  b8 01 00 00 00             mov    $0x1,%eax
  0f 43 c7                   cmovae %edi,%eax

With this patch, the compiler generates:

  89 f8                      mov    %edi,%eax
  83 ff 01                   cmp    $0x1,%edi
  83 d0 00                   adc    $0x0,%eax

saving 3 bytes.  We should be able to save 5 bytes in larger functions
where the mov is unnecessary.

This patch converts the specific cmov pattern to cmp $1 followed by
adc $0.

This patch partially fixes:

https://github.com/llvm/llvm-project/issues/60374

The LLVM IR optimizer is yet to canonicalize max expressions to
actual @llvm.umax.

Differential Revision: https://reviews.llvm.org/D144451

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/umax.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 26b6866cf476..3862f95d91d2 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47428,6 +47428,32 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
     }
   }
 
+  // Transform:
+  //
+  //   (cmov 1 T (uge T 2))
+  //
+  // to:
+  //
+  //   (adc T 0 (sub T 1))
+  if (CC == X86::COND_AE && isOneConstant(FalseOp) &&
+      Cond.getOpcode() == X86ISD::SUB && Cond->hasOneUse()) {
+    SDValue Cond0 = Cond.getOperand(0);
+    if (Cond0.getOpcode() == ISD::TRUNCATE)
+      Cond0 = Cond0.getOperand(0);
+    auto *Sub1C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+    if (Cond0 == TrueOp && Sub1C && Sub1C->getZExtValue() == 2) {
+      EVT CondVT = Cond->getValueType(0);
+      EVT OuterVT = N->getValueType(0);
+      // Subtract 1 and generate a carry.
+      SDValue NewSub =
+          DAG.getNode(X86ISD::SUB, DL, Cond->getVTList(), Cond.getOperand(0),
+                      DAG.getConstant(1, DL, CondVT));
+      SDValue EFLAGS(NewSub.getNode(), 1);
+      return DAG.getNode(X86ISD::ADC, DL, DAG.getVTList(OuterVT, MVT::i32),
+                         TrueOp, DAG.getConstant(0, DL, OuterVT), EFLAGS);
+    }
+  }
+
   // Fold and/or of setcc's to double CMOV:
   //   (CMOV F, T, ((cc1 | cc2) != 0)) -> (CMOV (CMOV F, T, cc1), T, cc2)
   //   (CMOV F, T, ((cc1 & cc2) != 0)) -> (CMOV (CMOV T, F, !cc1), F, !cc2)

diff  --git a/llvm/test/CodeGen/X86/umax.ll b/llvm/test/CodeGen/X86/umax.ll
index e4b465449e25..ce71a891f45e 100644
--- a/llvm/test/CodeGen/X86/umax.ll
+++ b/llvm/test/CodeGen/X86/umax.ll
@@ -44,18 +44,17 @@ define i8 @test_i8(i8 %a, i8 %b) nounwind {
 define i8 @test_i8_1(i8 %a) nounwind {
 ; X64-LABEL: test_i8_1:
 ; X64:       # %bb.0:
-; X64-NEXT:    cmpb $2, %dil
-; X64-NEXT:    movl $1, %eax
-; X64-NEXT:    cmovael %edi, %eax
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    cmpb $1, %al
+; X64-NEXT:    adcl $0, %eax
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: test_i8_1:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    cmpb $2, %cl
-; X86-NEXT:    movl $1, %eax
-; X86-NEXT:    cmovael %ecx, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    cmpb $1, %al
+; X86-NEXT:    adcl $0, %eax
 ; X86-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-NEXT:    retl
   %r = call i8 @llvm.umax.i8(i8 %a, i8 1)
@@ -86,18 +85,17 @@ define i16 @test_i16(i16 %a, i16 %b) nounwind {
 define i16 @test_i16_1(i16 %a) nounwind {
 ; X64-LABEL: test_i16_1:
 ; X64:       # %bb.0:
-; X64-NEXT:    cmpw $2, %di
-; X64-NEXT:    movl $1, %eax
-; X64-NEXT:    cmovael %edi, %eax
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    cmpw $1, %ax
+; X64-NEXT:    adcl $0, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: test_i16_1:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    cmpw $2, %cx
-; X86-NEXT:    movl $1, %eax
-; X86-NEXT:    cmovael %ecx, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    cmpw $1, %ax
+; X86-NEXT:    adcl $0, %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
   %r = call i16 @llvm.umax.i16(i16 %a, i16 1)
@@ -149,17 +147,16 @@ define i32 @test_i32(i32 %a, i32 %b) nounwind {
 define i32 @test_i32_1(i32 %a) nounwind {
 ; X64-LABEL: test_i32_1:
 ; X64:       # %bb.0:
-; X64-NEXT:    cmpl $2, %edi
-; X64-NEXT:    movl $1, %eax
-; X64-NEXT:    cmovael %edi, %eax
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    cmpl $1, %edi
+; X64-NEXT:    adcl $0, %eax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: test_i32_1:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    cmpl $2, %ecx
-; X86-NEXT:    movl $1, %eax
-; X86-NEXT:    cmovael %ecx, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    cmpl $1, %eax
+; X86-NEXT:    adcl $0, %eax
 ; X86-NEXT:    retl
   %r = call i32 @llvm.umax.i32(i32 %a, i32 1)
   ret i32 %r
@@ -198,9 +195,9 @@ define i64 @test_i64(i64 %a, i64 %b) nounwind {
 define i64 @test_i64_1(i64 %a) nounwind {
 ; X64-LABEL: test_i64_1:
 ; X64:       # %bb.0:
-; X64-NEXT:    cmpq $2, %rdi
-; X64-NEXT:    movl $1, %eax
-; X64-NEXT:    cmovaeq %rdi, %rax
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    cmpq $1, %rdi
+; X64-NEXT:    adcq $0, %rax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: test_i64_1:
@@ -208,11 +205,11 @@ define i64 @test_i64_1(i64 %a) nounwind {
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    cmpl $2, %ecx
-; X86-NEXT:    movl $1, %eax
-; X86-NEXT:    movl $1, %esi
-; X86-NEXT:    cmovael %ecx, %esi
+; X86-NEXT:    cmpl $1, %ecx
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    adcl $0, %esi
 ; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl $1, %eax
 ; X86-NEXT:    cmovnel %ecx, %eax
 ; X86-NEXT:    cmovel %esi, %eax
 ; X86-NEXT:    popl %esi