[PATCH] D144451: [X86] Optimize umax(X,1) (NFC)

Tue Feb 21 00:16:44 PST 2023

kazu created this revision.
Herald added subscribers: pengfei, hiraditya.
Herald added a project: All.
kazu requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Without this patch:

  %cond = call i32 @llvm.umax.i32(i32 %X, i32 1)

is compiled as:

  83 ff 02                   cmp    $0x2,%edi
  b8 01 00 00 00             mov    $0x1,%eax
  0f 43 c7                   cmovae %edi,%eax

With this patch, the compiler generates:

  89 f8                      mov    %edi,%eax
  83 ff 01                   cmp    $0x1,%edi
  83 d0 00                   adc    $0x0,%eax

saving 3 bytes.  We should be able to save 5 bytes in larger functions
where the mov is unnecessary.

This patch converts the specific cmov pattern to cmp $1 followed by
adc $0.

This patch partially fixes:

https://github.com/llvm/llvm-project/issues/60374

The LLVM IR optimizer is yet to canonicalize max expressions to
actual @llvm.umax.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D144451

Files:
  llvm/lib/Target/X86/X86ISelLowering.cpp
  llvm/test/CodeGen/X86/umax.ll


Index: llvm/test/CodeGen/X86/umax.ll
===================================================================

--- llvm/test/CodeGen/X86/umax.ll
+++ llvm/test/CodeGen/X86/umax.ll
@@ -107,17 +107,16 @@
 define i32 @test_i32_1(i32 %a) nounwind {
 ; X64-LABEL: test_i32_1:
 ; X64:       # %bb.0:
-; X64-NEXT:    cmpl $2, %edi
-; X64-NEXT:    movl $1, %eax
-; X64-NEXT:    cmovael %edi, %eax
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    cmpl $1, %edi
+; X64-NEXT:    adcl $0, %eax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: test_i32_1:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    cmpl $2, %ecx
-; X86-NEXT:    movl $1, %eax
-; X86-NEXT:    cmovael %ecx, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    cmpl $1, %eax
+; X86-NEXT:    adcl $0, %eax
 ; X86-NEXT:    retl
   %r = call i32 @llvm.umax.i32(i32 %a, i32 1)
   ret i32 %r
@@ -156,9 +155,9 @@
 define i64 @test_i64_1(i64 %a) nounwind {
 ; X64-LABEL: test_i64_1:
 ; X64:       # %bb.0:
-; X64-NEXT:    cmpq $2, %rdi
-; X64-NEXT:    movl $1, %eax
-; X64-NEXT:    cmovaeq %rdi, %rax
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    cmpq $1, %rdi
+; X64-NEXT:    adcq $0, %rax
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: test_i64_1:
@@ -166,11 +165,11 @@
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    cmpl $2, %ecx
-; X86-NEXT:    movl $1, %eax
-; X86-NEXT:    movl $1, %esi
-; X86-NEXT:    cmovael %ecx, %esi
+; X86-NEXT:    cmpl $1, %ecx
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    adcl $0, %esi
 ; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl $1, %eax
 ; X86-NEXT:    cmovnel %ecx, %eax
 ; X86-NEXT:    cmovel %esi, %eax
 ; X86-NEXT:    popl %esi
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47266,6 +47266,28 @@
     }
   }
 
+  // Transform:
+  //
+  //   (cmov 1 T (uge T 2))
+  //
+  // to:
+  //
+  //   (adc T 0 (sub T 1))
+  if (CC == X86::COND_AE && isOneConstant(FalseOp) &&
+      Cond.getOpcode() == X86ISD::SUB && Cond.getOperand(0) == TrueOp &&
+      Cond.hasOneUse()) {
+    ConstantSDNode *Sub1C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+    if (Sub1C && Sub1C->getZExtValue() == 2) {
+      EVT VT = Cond->getValueType(0);
+      // Subtract 1 and generate a carry.
+      SDValue NewSub = DAG.getNode(X86ISD::SUB, DL, Cond->getVTList(), TrueOp,
+                                   DAG.getConstant(1, DL, VT));
+      SDValue EFLAGS(NewSub.getNode(), 1);
+      return DAG.getNode(X86ISD::ADC, DL, DAG.getVTList(VT, MVT::i32), TrueOp,
+                         DAG.getConstant(0, DL, VT), EFLAGS);
+    }
+  }
+
   // Fold and/or of setcc's to double CMOV:
   //   (CMOV F, T, ((cc1 | cc2) != 0)) -> (CMOV (CMOV F, T, cc1), T, cc2)
   //   (CMOV F, T, ((cc1 & cc2) != 0)) -> (CMOV (CMOV T, F, !cc1), F, !cc2)


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D144451.499053.patch
Type: text/x-patch
Size: 2981 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230221/84ed29c2/attachment.bin>