[llvm] [SelectionDAG] Expand [US]CMP using arithmetic on boolean values instead of selects (PR #98774)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 13 14:29:15 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
Author: Volodymyr Vasylkun (Poseydon42)
<details>
<summary>Changes</summary>
The previous expansion of [US]CMP was done using two selects and two compares. It produced decent code, but on many platforms it is better to implement [US]CMP nodes by performing the following operation:
```
[us]cmp(x, y) = (x [us]> y) - (x [us]< y)
```
This patch adds this new expansion, as well as a hook in TargetLowering to allow some targets to still use the select-based approach. AArch64 is currently the only target to prefer the former due to it having a `csinv` instruction that performs the job of a `select` + `not`, but other targets may also start to use it if it provides for better codegen.
---
Patch is 239.50 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/98774.diff
5 Files Affected:
- (modified) llvm/include/llvm/CodeGen/TargetLowering.h (+4)
- (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+16-6)
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.h (+2)
- (modified) llvm/test/CodeGen/X86/scmp.ll (+1164-1315)
- (modified) llvm/test/CodeGen/X86/ucmp.ll (+1188-1532)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 55b60b01e5827..92deae43a252e 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3390,6 +3390,10 @@ class TargetLoweringBase {
return isOperationLegalOrCustom(Op, VT);
}
+ /// Should we expand [US]CMP nodes using two selects and two compares, or by
+ /// doing arithmetic on boolean types
+ virtual bool shouldExpandCmpUsingSelects() const { return false; }
+
/// Does this target support complex deinterleaving
virtual bool isComplexDeinterleavingSupported() const { return false; }
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 690a86bd4606c..e38f71bd59234 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -10381,14 +10381,24 @@ SDValue TargetLowering::expandCMP(SDNode *Node, SelectionDAG &DAG) const {
auto LTPredicate = (Opcode == ISD::UCMP ? ISD::SETULT : ISD::SETLT);
auto GTPredicate = (Opcode == ISD::UCMP ? ISD::SETUGT : ISD::SETGT);
-
SDValue IsLT = DAG.getSetCC(dl, BoolVT, LHS, RHS, LTPredicate);
SDValue IsGT = DAG.getSetCC(dl, BoolVT, LHS, RHS, GTPredicate);
- SDValue SelectZeroOrOne =
- DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
- DAG.getConstant(0, dl, ResVT));
- return DAG.getSelect(dl, ResVT, IsLT, DAG.getConstant(-1, dl, ResVT),
- SelectZeroOrOne);
+
+ // We can't perform arithmetic on i1 values. Extending them would
+ // probably result in worse codegen, so let's just use two selects instead.
+ // Some targets are also just better off using selects rather than subtraction
+ // because one of the conditions can be merged with one of the selects
+ EVT BoolElVT = BoolVT.isVector() ? BoolVT.getVectorElementType() : BoolVT;
+ if (shouldExpandCmpUsingSelects() || !BoolElVT.knownBitsGT(MVT::i1)) {
+ SDValue SelectZeroOrOne =
+ DAG.getSelect(dl, ResVT, IsGT, DAG.getConstant(1, dl, ResVT),
+ DAG.getConstant(0, dl, ResVT));
+ return DAG.getSelect(dl, ResVT, IsLT, DAG.getConstant(-1, dl, ResVT),
+ SelectZeroOrOne);
+ }
+
+ return DAG.getSExtOrTrunc(DAG.getNode(ISD::SUB, dl, BoolVT, IsGT, IsLT), dl,
+ ResVT);
}
SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 047c852bb01d2..fcdd47541be82 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -907,6 +907,8 @@ class AArch64TargetLowering : public TargetLowering {
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
+ bool shouldExpandCmpUsingSelects() const override { return true; }
+
bool isComplexDeinterleavingSupported() const override;
bool isComplexDeinterleavingOperationSupported(
ComplexDeinterleavingOperation Operation, Type *Ty) const override;
diff --git a/llvm/test/CodeGen/X86/scmp.ll b/llvm/test/CodeGen/X86/scmp.ll
index 55dc0d6059e05..7d4bbb06534e6 100644
--- a/llvm/test/CodeGen/X86/scmp.ll
+++ b/llvm/test/CodeGen/X86/scmp.ll
@@ -5,24 +5,19 @@
define i8 @scmp.8.8(i8 %x, i8 %y) nounwind {
; X64-LABEL: scmp.8.8:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpb %sil, %dil
-; X64-NEXT: setg %cl
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovgel %ecx, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: setl %cl
+; X64-NEXT: setg %al
+; X64-NEXT: subb %cl, %al
; X64-NEXT: retq
;
; X86-LABEL: scmp.8.8:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al
-; X86-NEXT: setg %cl
-; X86-NEXT: movb $-1, %al
-; X86-NEXT: jl .LBB0_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB0_2:
+; X86-NEXT: setl %cl
+; X86-NEXT: setg %al
+; X86-NEXT: subb %cl, %al
; X86-NEXT: retl
%1 = call i8 @llvm.scmp(i8 %x, i8 %y)
ret i8 %1
@@ -31,24 +26,19 @@ define i8 @scmp.8.8(i8 %x, i8 %y) nounwind {
define i8 @scmp.8.16(i16 %x, i16 %y) nounwind {
; X64-LABEL: scmp.8.16:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpw %si, %di
-; X64-NEXT: setg %cl
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovgel %ecx, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: setl %cl
+; X64-NEXT: setg %al
+; X64-NEXT: subb %cl, %al
; X64-NEXT: retq
;
; X86-LABEL: scmp.8.16:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpw {{[0-9]+}}(%esp), %ax
-; X86-NEXT: setg %cl
-; X86-NEXT: movb $-1, %al
-; X86-NEXT: jl .LBB1_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB1_2:
+; X86-NEXT: setl %cl
+; X86-NEXT: setg %al
+; X86-NEXT: subb %cl, %al
; X86-NEXT: retl
%1 = call i8 @llvm.scmp(i16 %x, i16 %y)
ret i8 %1
@@ -57,24 +47,19 @@ define i8 @scmp.8.16(i16 %x, i16 %y) nounwind {
define i8 @scmp.8.32(i32 %x, i32 %y) nounwind {
; X64-LABEL: scmp.8.32:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpl %esi, %edi
-; X64-NEXT: setg %cl
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovgel %ecx, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: setl %cl
+; X64-NEXT: setg %al
+; X64-NEXT: subb %cl, %al
; X64-NEXT: retq
;
; X86-LABEL: scmp.8.32:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: setg %cl
-; X86-NEXT: movb $-1, %al
-; X86-NEXT: jl .LBB2_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB2_2:
+; X86-NEXT: setl %cl
+; X86-NEXT: setg %al
+; X86-NEXT: subb %cl, %al
; X86-NEXT: retl
%1 = call i8 @llvm.scmp(i32 %x, i32 %y)
ret i8 %1
@@ -83,35 +68,32 @@ define i8 @scmp.8.32(i32 %x, i32 %y) nounwind {
define i8 @scmp.8.64(i64 %x, i64 %y) nounwind {
; X64-LABEL: scmp.8.64:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpq %rsi, %rdi
-; X64-NEXT: setg %cl
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovgel %ecx, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: setl %cl
+; X64-NEXT: setg %al
+; X64-NEXT: subb %cl, %al
; X64-NEXT: retq
;
; X86-LABEL: scmp.8.64:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmpl %eax, %esi
-; X86-NEXT: movl %edi, %ecx
-; X86-NEXT: sbbl %edx, %ecx
-; X86-NEXT: setl %cl
-; X86-NEXT: cmpl %esi, %eax
-; X86-NEXT: sbbl %edi, %edx
-; X86-NEXT: movb $-1, %al
-; X86-NEXT: jl .LBB3_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB3_2:
+; X86-NEXT: cmpl %eax, %edx
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: sbbl %ecx, %edi
+; X86-NEXT: setl %bl
+; X86-NEXT: cmpl %edx, %eax
+; X86-NEXT: sbbl %esi, %ecx
+; X86-NEXT: setl %al
+; X86-NEXT: subb %bl, %al
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
; X86-NEXT: retl
%1 = call i8 @llvm.scmp(i64 %x, i64 %y)
ret i8 %1
@@ -120,16 +102,14 @@ define i8 @scmp.8.64(i64 %x, i64 %y) nounwind {
define i8 @scmp.8.128(i128 %x, i128 %y) nounwind {
; X64-LABEL: scmp.8.128:
; X64: # %bb.0:
+; X64-NEXT: cmpq %rdx, %rdi
+; X64-NEXT: movq %rsi, %rax
+; X64-NEXT: sbbq %rcx, %rax
+; X64-NEXT: setl %r8b
; X64-NEXT: cmpq %rdi, %rdx
-; X64-NEXT: movq %rcx, %rax
-; X64-NEXT: sbbq %rsi, %rax
+; X64-NEXT: sbbq %rsi, %rcx
; X64-NEXT: setl %al
-; X64-NEXT: movzbl %al, %r8d
-; X64-NEXT: cmpq %rdx, %rdi
-; X64-NEXT: sbbq %rcx, %rsi
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovgel %r8d, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: subb %r8b, %al
; X64-NEXT: retq
;
; X86-LABEL: scmp.8.128:
@@ -142,26 +122,23 @@ define i8 @scmp.8.128(i128 %x, i128 %y) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %ebp, %ebx
-; X86-NEXT: sbbl %edx, %ebx
-; X86-NEXT: movl %ecx, %ebx
-; X86-NEXT: sbbl %eax, %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %ebx, %ebp
+; X86-NEXT: sbbl %edx, %ebp
+; X86-NEXT: movl %ecx, %ebp
+; X86-NEXT: sbbl %eax, %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl %esi, %ecx
-; X86-NEXT: sbbl %ebx, %ecx
+; X86-NEXT: sbbl %ebp, %ecx
; X86-NEXT: setl %cl
; X86-NEXT: cmpl %edi, {{[0-9]+}}(%esp)
-; X86-NEXT: sbbl %ebp, %edx
+; X86-NEXT: sbbl %ebx, %edx
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl %esi, %ebx
-; X86-NEXT: movb $-1, %al
-; X86-NEXT: jl .LBB4_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB4_2:
+; X86-NEXT: sbbl %esi, %ebp
+; X86-NEXT: setl %al
+; X86-NEXT: subb %cl, %al
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -174,25 +151,21 @@ define i8 @scmp.8.128(i128 %x, i128 %y) nounwind {
define i32 @scmp.32.32(i32 %x, i32 %y) nounwind {
; X64-LABEL: scmp.32.32:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpl %esi, %edi
+; X64-NEXT: setl %al
; X64-NEXT: setg %cl
-; X64-NEXT: movl $-1, %eax
-; X64-NEXT: cmovgel %ecx, %eax
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbl %cl, %eax
; X64-NEXT: retq
;
; X86-LABEL: scmp.32.32:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: setg %dl
-; X86-NEXT: movl $-1, %eax
-; X86-NEXT: jl .LBB5_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movb %dl, %cl
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB5_2:
+; X86-NEXT: setl %al
+; X86-NEXT: setg %cl
+; X86-NEXT: subb %al, %cl
+; X86-NEXT: movsbl %cl, %eax
; X86-NEXT: retl
%1 = call i32 @llvm.scmp(i32 %x, i32 %y)
ret i32 %1
@@ -201,34 +174,34 @@ define i32 @scmp.32.32(i32 %x, i32 %y) nounwind {
define i32 @scmp.32.64(i64 %x, i64 %y) nounwind {
; X64-LABEL: scmp.32.64:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpq %rsi, %rdi
+; X64-NEXT: setl %al
; X64-NEXT: setg %cl
-; X64-NEXT: movl $-1, %eax
-; X64-NEXT: cmovgel %ecx, %eax
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbl %cl, %eax
; X64-NEXT: retq
;
; X86-LABEL: scmp.32.64:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmpl %eax, %esi
-; X86-NEXT: movl %edi, %ecx
-; X86-NEXT: sbbl %edx, %ecx
-; X86-NEXT: setl %cl
-; X86-NEXT: cmpl %esi, %eax
-; X86-NEXT: sbbl %edi, %edx
-; X86-NEXT: movl $-1, %eax
-; X86-NEXT: jl .LBB6_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movzbl %cl, %eax
-; X86-NEXT: .LBB6_2:
+; X86-NEXT: cmpl %eax, %edx
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: sbbl %ecx, %edi
+; X86-NEXT: setl %bl
+; X86-NEXT: cmpl %edx, %eax
+; X86-NEXT: sbbl %esi, %ecx
+; X86-NEXT: setl %al
+; X86-NEXT: subb %bl, %al
+; X86-NEXT: movsbl %al, %eax
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
; X86-NEXT: retl
%1 = call i32 @llvm.scmp(i64 %x, i64 %y)
ret i32 %1
@@ -237,36 +210,36 @@ define i32 @scmp.32.64(i64 %x, i64 %y) nounwind {
define i64 @scmp.64.64(i64 %x, i64 %y) nounwind {
; X64-LABEL: scmp.64.64:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpq %rsi, %rdi
+; X64-NEXT: setl %al
; X64-NEXT: setg %cl
-; X64-NEXT: movq $-1, %rax
-; X64-NEXT: cmovgeq %rcx, %rax
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbq %cl, %rax
; X64-NEXT: retq
;
; X86-LABEL: scmp.64.64:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: cmpl %eax, %esi
-; X86-NEXT: movl %edi, %ecx
-; X86-NEXT: sbbl %edx, %ecx
-; X86-NEXT: setl %cl
-; X86-NEXT: cmpl %esi, %eax
-; X86-NEXT: sbbl %edi, %edx
-; X86-NEXT: movl $-1, %eax
-; X86-NEXT: movl $-1, %edx
-; X86-NEXT: jl .LBB7_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movzbl %cl, %eax
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: .LBB7_2:
+; X86-NEXT: cmpl %eax, %edx
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: sbbl %ecx, %edi
+; X86-NEXT: setl %bl
+; X86-NEXT: cmpl %edx, %eax
+; X86-NEXT: sbbl %esi, %ecx
+; X86-NEXT: setl %al
+; X86-NEXT: subb %bl, %al
+; X86-NEXT: movsbl %al, %eax
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: sarl $31, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
; X86-NEXT: retl
%1 = call i64 @llvm.scmp(i64 %x, i64 %y)
ret i64 %1
@@ -275,24 +248,19 @@ define i64 @scmp.64.64(i64 %x, i64 %y) nounwind {
define i4 @scmp_narrow_result(i32 %x, i32 %y) nounwind {
; X64-LABEL: scmp_narrow_result:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpl %esi, %edi
-; X64-NEXT: setg %cl
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovgel %ecx, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: setl %cl
+; X64-NEXT: setg %al
+; X64-NEXT: subb %cl, %al
; X64-NEXT: retq
;
; X86-LABEL: scmp_narrow_result:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: setg %cl
-; X86-NEXT: movb $-1, %al
-; X86-NEXT: jl .LBB8_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: .LBB8_2:
+; X86-NEXT: setl %cl
+; X86-NEXT: setg %al
+; X86-NEXT: subb %cl, %al
; X86-NEXT: retl
%1 = call i4 @llvm.scmp(i32 %x, i32 %y)
ret i4 %1
@@ -305,39 +273,36 @@ define i8 @scmp_narrow_op(i62 %x, i62 %y) nounwind {
; X64-NEXT: sarq $2, %rsi
; X64-NEXT: shlq $2, %rdi
; X64-NEXT: sarq $2, %rdi
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpq %rsi, %rdi
-; X64-NEXT: setg %cl
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovgel %ecx, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: setl %cl
+; X64-NEXT: setg %al
+; X64-NEXT: subb %cl, %al
; X64-NEXT: retq
;
; X86-LABEL: scmp_narrow_op:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shll $2, %eax
; X86-NEXT: sarl $2, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: shll $2, %edi
-; X86-NEXT: sarl $2, %edi
-; X86-NEXT: cmpl %ecx, %esi
-; X86-NEXT: movl %edi, %edx
-; X86-NEXT: sbbl %eax, %edx
-; X86-NEXT: setl %dl
-; X86-NEXT: cmpl %esi, %ecx
-; X86-NEXT: sbbl %edi, %eax
-; X86-NEXT: movb $-1, %al
-; X86-NEXT: jl .LBB9_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: .LBB9_2:
+; X86-NEXT: shll $2, %esi
+; X86-NEXT: sarl $2, %esi
+; X86-NEXT: cmpl %ecx, %edx
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: sbbl %eax, %edi
+; X86-NEXT: setl %bl
+; X86-NEXT: cmpl %edx, %ecx
+; X86-NEXT: sbbl %esi, %eax
+; X86-NEXT: setl %al
+; X86-NEXT: subb %bl, %al
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
; X86-NEXT: retl
%1 = call i8 @llvm.scmp(i62 %x, i62 %y)
ret i8 %1
@@ -346,39 +311,33 @@ define i8 @scmp_narrow_op(i62 %x, i62 %y) nounwind {
define i141 @scmp_wide_result(i32 %x, i32 %y) nounwind {
; X64-LABEL: scmp_wide_result:
; X64: # %bb.0:
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: cmpl %esi, %edi
+; X64-NEXT: setl %al
; X64-NEXT: setg %cl
-; X64-NEXT: movq $-1, %rax
-; X64-NEXT: cmovgeq %rcx, %rax
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: xorl %ecx, %ecx
+; X64-NEXT: subb %al, %cl
+; X64-NEXT: movsbq %cl, %rax
+; X64-NEXT: movq %rax, %rdx
+; X64-NEXT: sarq $63, %rdx
+; X64-NEXT: movl %edx, %ecx
+; X64-NEXT: andl $8191, %ecx # imm = 0x1FFF
; X64-NEXT: retq
;
; X86-LABEL: scmp_wide_result:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: setg %bl
-; X86-NEXT: movl $-1, %edx
-; X86-NEXT: movl $-1, %esi
-; X86-NEXT: jl .LBB10_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: xorl %esi, %esi
-; X86-NEXT: movb %bl, %cl
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: .LBB10_2:
-; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: movl $0, 12(%eax)
-; X86-NEXT: movl $0, 8(%eax)
-; X86-NEXT: movw $0, 16(%eax)
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: setl %cl
+; X86-NEXT: setg %dl
+; X86-NEXT: subb %cl, %dl
+; X86-NEXT: movsbl %dl, %ecx
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: sarl $31, %ecx
+; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: movl %ecx, 8(%eax)
+; X86-NEXT: movl %ecx, 4(%eax)
+; X86-NEXT: andl $8191, %ecx # imm = 0x1FFF
+; X86-NEXT: movw %cx, 16(%eax)
; X86-NEXT: retl $4
%1 = call i141 @llvm.scmp(i32 %x, i32 %y)
ret i141 %1
@@ -387,20 +346,18 @@ define i141 @scmp_wide_result(i32 %x, i32 %y) nounwind {
define i8 @scmp_wide_op(i109 %x, i109 %y) nounwind {
; X64-LABEL: scmp_wide_op:
; X64: # %bb.0:
-; X64-NEXT: shlq $19, %rsi
-; X64-NEXT: sarq $19, %rsi
; X64-NEXT: shlq $19, %rcx
; X64-NEXT: sarq $19, %rcx
+; X64-NEXT: shlq $19, %rsi
+; X64-NEXT: sarq $19, %rsi
+; X64-NEXT: cmpq %rdx, %rdi
+; X64-NEXT: movq %rsi, %rax
+; X64-NEXT: sbbq %rcx, %rax
+; X64-NEXT: setl %r8b
; X64-NEXT: cmpq %rdi, %rdx
-; X64-NEXT: movq %rcx, %rax
-; X64-NEXT: sbbq %rsi, %rax
+; X64-NEXT: sbbq %rsi, %rcx
; X64-NEXT: setl %al
-; X64-NEXT: movzbl %al, %r8d
-; X64-NEXT: cmpq %rdx, %rdi
-; X64-NEXT: sbbq %rcx, %rsi
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovgel %r8d, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: subb %r8b, %al
; X64-NEXT: retq
;
; X86-LABEL: scmp_wide_op:
@@ -409,35 +366,31 @@ define i8 @scmp_wide_op(i109 %x, i109 %y) nounwind {
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: pushl %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shll $19, %eax
; X86-NEXT: sarl $19, %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shll $19, %ecx
; X86-NEXT: sarl $19, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: cmpl {{[0-9]+}}(%esp), %ebp
-; X86-NEXT: sbbl %edx, %ebx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: cmpl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: sbbl %edx, %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, %esi
-; X86-NEXT: sbbl %ebx, %esi
+; X86-NEXT: sbbl %ebp, %esi
...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/98774
More information about the llvm-commits
mailing list