[llvm] a21a7dd - [X86] Optimize umax(X,1) (NFC)
Kazu Hirata via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 6 10:19:13 PST 2023
Author: Kazu Hirata
Date: 2023-03-06T10:18:57-08:00
New Revision: a21a7ddf5ad1f34874cddb4d10cbd40b8ce1bef8
URL: https://github.com/llvm/llvm-project/commit/a21a7ddf5ad1f34874cddb4d10cbd40b8ce1bef8
DIFF: https://github.com/llvm/llvm-project/commit/a21a7ddf5ad1f34874cddb4d10cbd40b8ce1bef8.diff
LOG: [X86] Optimize umax(X,1) (NFC)
Without this patch:
%cond = call i32 @llvm.umax.i32(i32 %X, i32 1)
is compiled as:
83 ff 02 cmp $0x2,%edi
b8 01 00 00 00 mov $0x1,%eax
0f 43 c7 cmovae %edi,%eax
With this patch, the compiler generates:
89 f8 mov %edi,%eax
83 ff 01 cmp $0x1,%edi
83 d0 00 adc $0x0,%eax
saving 3 bytes. We should be able to save 5 bytes in larger functions
where the mov is unnecessary.
This patch converts the specific cmov pattern to cmp $1 followed by
adc $0.
This patch partially fixes:
https://github.com/llvm/llvm-project/issues/60374
The LLVM IR optimizer is yet to canonicalize max expressions to
actual @llvm.umax.
Differential Revision: https://reviews.llvm.org/D144451
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/umax.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 26b6866cf476..3862f95d91d2 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47428,6 +47428,32 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
}
}
+ // Transform:
+ //
+ // (cmov 1 T (uge T 2))
+ //
+ // to:
+ //
+ // (adc T 0 (sub T 1))
+ if (CC == X86::COND_AE && isOneConstant(FalseOp) &&
+ Cond.getOpcode() == X86ISD::SUB && Cond->hasOneUse()) {
+ SDValue Cond0 = Cond.getOperand(0);
+ if (Cond0.getOpcode() == ISD::TRUNCATE)
+ Cond0 = Cond0.getOperand(0);
+ auto *Sub1C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+ if (Cond0 == TrueOp && Sub1C && Sub1C->getZExtValue() == 2) {
+ EVT CondVT = Cond->getValueType(0);
+ EVT OuterVT = N->getValueType(0);
+ // Subtract 1 and generate a carry.
+ SDValue NewSub =
+ DAG.getNode(X86ISD::SUB, DL, Cond->getVTList(), Cond.getOperand(0),
+ DAG.getConstant(1, DL, CondVT));
+ SDValue EFLAGS(NewSub.getNode(), 1);
+ return DAG.getNode(X86ISD::ADC, DL, DAG.getVTList(OuterVT, MVT::i32),
+ TrueOp, DAG.getConstant(0, DL, OuterVT), EFLAGS);
+ }
+ }
+
// Fold and/or of setcc's to double CMOV:
// (CMOV F, T, ((cc1 | cc2) != 0)) -> (CMOV (CMOV F, T, cc1), T, cc2)
// (CMOV F, T, ((cc1 & cc2) != 0)) -> (CMOV (CMOV T, F, !cc1), F, !cc2)
diff --git a/llvm/test/CodeGen/X86/umax.ll b/llvm/test/CodeGen/X86/umax.ll
index e4b465449e25..ce71a891f45e 100644
--- a/llvm/test/CodeGen/X86/umax.ll
+++ b/llvm/test/CodeGen/X86/umax.ll
@@ -44,18 +44,17 @@ define i8 @test_i8(i8 %a, i8 %b) nounwind {
define i8 @test_i8_1(i8 %a) nounwind {
; X64-LABEL: test_i8_1:
; X64: # %bb.0:
-; X64-NEXT: cmpb $2, %dil
-; X64-NEXT: movl $1, %eax
-; X64-NEXT: cmovael %edi, %eax
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: cmpb $1, %al
+; X64-NEXT: adcl $0, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
;
; X86-LABEL: test_i8_1:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpb $2, %cl
-; X86-NEXT: movl $1, %eax
-; X86-NEXT: cmovael %ecx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpb $1, %al
+; X86-NEXT: adcl $0, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
%r = call i8 @llvm.umax.i8(i8 %a, i8 1)
@@ -86,18 +85,17 @@ define i16 @test_i16(i16 %a, i16 %b) nounwind {
define i16 @test_i16_1(i16 %a) nounwind {
; X64-LABEL: test_i16_1:
; X64: # %bb.0:
-; X64-NEXT: cmpw $2, %di
-; X64-NEXT: movl $1, %eax
-; X64-NEXT: cmovael %edi, %eax
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: cmpw $1, %ax
+; X64-NEXT: adcl $0, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
;
; X86-LABEL: test_i16_1:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpw $2, %cx
-; X86-NEXT: movl $1, %eax
-; X86-NEXT: cmovael %ecx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpw $1, %ax
+; X86-NEXT: adcl $0, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
%r = call i16 @llvm.umax.i16(i16 %a, i16 1)
@@ -149,17 +147,16 @@ define i32 @test_i32(i32 %a, i32 %b) nounwind {
define i32 @test_i32_1(i32 %a) nounwind {
; X64-LABEL: test_i32_1:
; X64: # %bb.0:
-; X64-NEXT: cmpl $2, %edi
-; X64-NEXT: movl $1, %eax
-; X64-NEXT: cmovael %edi, %eax
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: cmpl $1, %edi
+; X64-NEXT: adcl $0, %eax
; X64-NEXT: retq
;
; X86-LABEL: test_i32_1:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: cmpl $2, %ecx
-; X86-NEXT: movl $1, %eax
-; X86-NEXT: cmovael %ecx, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmpl $1, %eax
+; X86-NEXT: adcl $0, %eax
; X86-NEXT: retl
%r = call i32 @llvm.umax.i32(i32 %a, i32 1)
ret i32 %r
@@ -198,9 +195,9 @@ define i64 @test_i64(i64 %a, i64 %b) nounwind {
define i64 @test_i64_1(i64 %a) nounwind {
; X64-LABEL: test_i64_1:
; X64: # %bb.0:
-; X64-NEXT: cmpq $2, %rdi
-; X64-NEXT: movl $1, %eax
-; X64-NEXT: cmovaeq %rdi, %rax
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: cmpq $1, %rdi
+; X64-NEXT: adcq $0, %rax
; X64-NEXT: retq
;
; X86-LABEL: test_i64_1:
@@ -208,11 +205,11 @@ define i64 @test_i64_1(i64 %a) nounwind {
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: cmpl $2, %ecx
-; X86-NEXT: movl $1, %eax
-; X86-NEXT: movl $1, %esi
-; X86-NEXT: cmovael %ecx, %esi
+; X86-NEXT: cmpl $1, %ecx
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: adcl $0, %esi
; X86-NEXT: testl %edx, %edx
+; X86-NEXT: movl $1, %eax
; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: cmovel %esi, %eax
; X86-NEXT: popl %esi
More information about the llvm-commits
mailing list