[llvm] 1104056 - [X86] preferABDSToABSWithNSW - use ABDS for i32/i64 if we have CMOV
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 5 04:53:48 PDT 2024
Author: Simon Pilgrim
Date: 2024-09-05T12:53:37+01:00
New Revision: 11040560ba30381ed47c3089a2562a41b00dbb4b
URL: https://github.com/llvm/llvm-project/commit/11040560ba30381ed47c3089a2562a41b00dbb4b
DIFF: https://github.com/llvm/llvm-project/commit/11040560ba30381ed47c3089a2562a41b00dbb4b.diff
LOG: [X86] preferABDSToABSWithNSW - use ABDS for i32/i64 if we have CMOV
Now that we have better ABDS lowering, prefer cmov(sub(x,y),sub(y,x)) to cmov(abs(sub(x,y)),sub(x,y)) to improve ILP
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/abds-neg.ll
llvm/test/CodeGen/X86/abds.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 451881e1d61415..092a7192929fd5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -58210,7 +58210,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
}
bool X86TargetLowering::preferABDSToABSWithNSW(EVT VT) const {
- return false;
+ return Subtarget.canUseCMOV() && (VT == MVT::i32 || VT == MVT::i64);
}
// Prefer (non-AVX512) vector TRUNCATE(SIGN_EXTEND_INREG(X)) to use of PACKSS.
diff --git a/llvm/test/CodeGen/X86/abds-neg.ll b/llvm/test/CodeGen/X86/abds-neg.ll
index 833273dc982438..f837f49abf7a4f 100644
--- a/llvm/test/CodeGen/X86/abds-neg.ll
+++ b/llvm/test/CodeGen/X86/abds-neg.ll
@@ -1027,19 +1027,22 @@ define i16 @abd_subnsw_i16_undef(i16 %a, i16 %b) nounwind {
define i32 @abd_subnsw_i32(i32 %a, i32 %b) nounwind {
; X86-LABEL: abd_subnsw_i32:
; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: subl %eax, %edx
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: cmovll %edx, %eax
; X86-NEXT: negl %eax
-; X86-NEXT: cmovnsl %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: abd_subnsw_i32:
; X64: # %bb.0:
-; X64-NEXT: subl %esi, %edi
; X64-NEXT: movl %edi, %eax
+; X64-NEXT: subl %esi, %eax
+; X64-NEXT: subl %edi, %esi
+; X64-NEXT: cmovgel %esi, %eax
; X64-NEXT: negl %eax
-; X64-NEXT: cmovnsl %edi, %eax
; X64-NEXT: retq
%sub = sub nsw i32 %a, %b
%abs = call i32 @llvm.abs.i32(i32 %sub, i1 false)
@@ -1050,19 +1053,22 @@ define i32 @abd_subnsw_i32(i32 %a, i32 %b) nounwind {
define i32 @abd_subnsw_i32_undef(i32 %a, i32 %b) nounwind {
; X86-LABEL: abd_subnsw_i32_undef:
; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: subl %eax, %edx
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: cmovll %edx, %eax
; X86-NEXT: negl %eax
-; X86-NEXT: cmovnsl %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: abd_subnsw_i32_undef:
; X64: # %bb.0:
-; X64-NEXT: subl %esi, %edi
; X64-NEXT: movl %edi, %eax
+; X64-NEXT: subl %esi, %eax
+; X64-NEXT: subl %edi, %esi
+; X64-NEXT: cmovgel %esi, %eax
; X64-NEXT: negl %eax
-; X64-NEXT: cmovnsl %edi, %eax
; X64-NEXT: retq
%sub = sub nsw i32 %a, %b
%abs = call i32 @llvm.abs.i32(i32 %sub, i1 true)
@@ -1090,10 +1096,11 @@ define i64 @abd_subnsw_i64(i64 %a, i64 %b) nounwind {
;
; X64-LABEL: abd_subnsw_i64:
; X64: # %bb.0:
-; X64-NEXT: subq %rsi, %rdi
; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: subq %rsi, %rax
+; X64-NEXT: subq %rdi, %rsi
+; X64-NEXT: cmovgeq %rsi, %rax
; X64-NEXT: negq %rax
-; X64-NEXT: cmovnsq %rdi, %rax
; X64-NEXT: retq
%sub = sub nsw i64 %a, %b
%abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
@@ -1121,10 +1128,11 @@ define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind {
;
; X64-LABEL: abd_subnsw_i64_undef:
; X64: # %bb.0:
-; X64-NEXT: subq %rsi, %rdi
; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: subq %rsi, %rax
+; X64-NEXT: subq %rdi, %rsi
+; X64-NEXT: cmovgeq %rsi, %rax
; X64-NEXT: negq %rax
-; X64-NEXT: cmovnsq %rdi, %rax
; X64-NEXT: retq
%sub = sub nsw i64 %a, %b
%abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)
diff --git a/llvm/test/CodeGen/X86/abds.ll b/llvm/test/CodeGen/X86/abds.ll
index d9ba140032b31d..9476fd14306fe5 100644
--- a/llvm/test/CodeGen/X86/abds.ll
+++ b/llvm/test/CodeGen/X86/abds.ll
@@ -928,19 +928,20 @@ define i16 @abd_subnsw_i16_undef(i16 %a, i16 %b) nounwind {
define i32 @abd_subnsw_i32(i32 %a, i32 %b) nounwind {
; X86-LABEL: abd_subnsw_i32:
; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: negl %eax
-; X86-NEXT: cmovsl %ecx, %eax
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: subl %eax, %edx
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: cmovll %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: abd_subnsw_i32:
; X64: # %bb.0:
-; X64-NEXT: subl %esi, %edi
; X64-NEXT: movl %edi, %eax
-; X64-NEXT: negl %eax
-; X64-NEXT: cmovsl %edi, %eax
+; X64-NEXT: subl %esi, %eax
+; X64-NEXT: subl %edi, %esi
+; X64-NEXT: cmovgel %esi, %eax
; X64-NEXT: retq
%sub = sub nsw i32 %a, %b
%abs = call i32 @llvm.abs.i32(i32 %sub, i1 false)
@@ -950,19 +951,20 @@ define i32 @abd_subnsw_i32(i32 %a, i32 %b) nounwind {
define i32 @abd_subnsw_i32_undef(i32 %a, i32 %b) nounwind {
; X86-LABEL: abd_subnsw_i32_undef:
; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: negl %eax
-; X86-NEXT: cmovsl %ecx, %eax
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: subl %eax, %edx
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: cmovll %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: abd_subnsw_i32_undef:
; X64: # %bb.0:
-; X64-NEXT: subl %esi, %edi
; X64-NEXT: movl %edi, %eax
-; X64-NEXT: negl %eax
-; X64-NEXT: cmovsl %edi, %eax
+; X64-NEXT: subl %esi, %eax
+; X64-NEXT: subl %edi, %esi
+; X64-NEXT: cmovgel %esi, %eax
; X64-NEXT: retq
%sub = sub nsw i32 %a, %b
%abs = call i32 @llvm.abs.i32(i32 %sub, i1 true)
@@ -986,10 +988,10 @@ define i64 @abd_subnsw_i64(i64 %a, i64 %b) nounwind {
;
; X64-LABEL: abd_subnsw_i64:
; X64: # %bb.0:
-; X64-NEXT: subq %rsi, %rdi
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: negq %rax
-; X64-NEXT: cmovsq %rdi, %rax
+; X64-NEXT: subq %rsi, %rax
+; X64-NEXT: subq %rdi, %rsi
+; X64-NEXT: cmovgeq %rsi, %rax
; X64-NEXT: retq
%sub = sub nsw i64 %a, %b
%abs = call i64 @llvm.abs.i64(i64 %sub, i1 false)
@@ -1013,10 +1015,10 @@ define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind {
;
; X64-LABEL: abd_subnsw_i64_undef:
; X64: # %bb.0:
-; X64-NEXT: subq %rsi, %rdi
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: negq %rax
-; X64-NEXT: cmovsq %rdi, %rax
+; X64-NEXT: subq %rsi, %rax
+; X64-NEXT: subq %rdi, %rsi
+; X64-NEXT: cmovgeq %rsi, %rax
; X64-NEXT: retq
%sub = sub nsw i64 %a, %b
%abs = call i64 @llvm.abs.i64(i64 %sub, i1 true)
More information about the llvm-commits
mailing list