[llvm] e4ab202 - [X86] convertIntLogicToFPLogic - enable fp-logic on pre-AVX targets for supported fp predicates (PR34563)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 8 10:09:37 PST 2022
Author: Simon Pilgrim
Date: 2022-03-08T18:06:27Z
New Revision: e4ab2024a65a2233d258fc3377bd6c84376376aa
URL: https://github.com/llvm/llvm-project/commit/e4ab2024a65a2233d258fc3377bd6c84376376aa
DIFF: https://github.com/llvm/llvm-project/commit/e4ab2024a65a2233d258fc3377bd6c84376376aa.diff
LOG: [X86] convertIntLogicToFPLogic - enable fp-logic on pre-AVX targets for supported fp predicates (PR34563)
If the SETCC fp-condcode is supported on SSE as a single CMPPS/PD op then we can use convertIntLogicToFPLogic to reduce EFLAGS and XMM->GPR traffic like we do for AVX targets.
Differential Revision: https://reviews.llvm.org/D121210
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/fcmp-logic.ll
llvm/test/CodeGen/X86/pr40539.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d58c572a783ff..371255172fcab 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -46813,16 +46813,19 @@ static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG,
return DAG.getBitcast(VT, FPLogic);
}
- // The vector ISA for FP predicates is incomplete before AVX, so converting
- // COMIS* to CMPS* may not be a win before AVX.
- // TODO: Check types/predicates to see if they are available with SSE/SSE2.
- if (!Subtarget.hasAVX() || VT != MVT::i1 || N0.getOpcode() != ISD::SETCC ||
- !N0.hasOneUse() || !N1.hasOneUse())
+ if (VT != MVT::i1 || N0.getOpcode() != ISD::SETCC || !N0.hasOneUse() ||
+ !N1.hasOneUse())
return SDValue();
ISD::CondCode CC0 = cast<CondCodeSDNode>(N0.getOperand(2))->get();
ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
+ // The vector ISA for FP predicates is incomplete before AVX, so converting
+ // COMIS* to CMPS* may not be a win before AVX.
+ if (!Subtarget.hasAVX() &&
+ !(cheapX86FSETCC_SSE(CC0) && cheapX86FSETCC_SSE(CC1)))
+ return SDValue();
+
// Convert scalar FP compares and logic to vector compares (COMIS* to CMPS*)
// and vector logic:
// logic (setcc N00, N01), (setcc N10, N11) -->
diff --git a/llvm/test/CodeGen/X86/fcmp-logic.ll b/llvm/test/CodeGen/X86/fcmp-logic.ll
index 8981bbe888573..67ca5b250f636 100644
--- a/llvm/test/CodeGen/X86/fcmp-logic.ll
+++ b/llvm/test/CodeGen/X86/fcmp-logic.ll
@@ -6,11 +6,11 @@
define i1 @olt_ole_and_f32(float %w, float %x, float %y, float %z) {
; SSE2-LABEL: olt_ole_and_f32:
; SSE2: # %bb.0:
-; SSE2-NEXT: ucomiss %xmm0, %xmm1
-; SSE2-NEXT: seta %cl
-; SSE2-NEXT: ucomiss %xmm2, %xmm3
-; SSE2-NEXT: setae %al
-; SSE2-NEXT: andb %cl, %al
+; SSE2-NEXT: cmpleps %xmm3, %xmm2
+; SSE2-NEXT: cmpltps %xmm1, %xmm0
+; SSE2-NEXT: andps %xmm2, %xmm0
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: # kill: def $al killed $al killed $eax
; SSE2-NEXT: retq
;
; AVX1-LABEL: olt_ole_and_f32:
@@ -43,13 +43,11 @@ define i1 @olt_ole_and_f32(float %w, float %x, float %y, float %z) {
define i1 @oge_oeq_or_f32(float %w, float %x, float %y, float %z) {
; SSE2-LABEL: oge_oeq_or_f32:
; SSE2: # %bb.0:
-; SSE2-NEXT: ucomiss %xmm1, %xmm0
-; SSE2-NEXT: setae %cl
-; SSE2-NEXT: ucomiss %xmm3, %xmm2
-; SSE2-NEXT: setnp %dl
-; SSE2-NEXT: sete %al
-; SSE2-NEXT: andb %dl, %al
-; SSE2-NEXT: orb %cl, %al
+; SSE2-NEXT: cmpeqps %xmm3, %xmm2
+; SSE2-NEXT: cmpleps %xmm0, %xmm1
+; SSE2-NEXT: orps %xmm2, %xmm1
+; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: # kill: def $al killed $al killed $eax
; SSE2-NEXT: retq
;
; AVX1-LABEL: oge_oeq_or_f32:
@@ -121,13 +119,11 @@ define i1 @ord_one_xor_f32(float %w, float %x, float %y, float %z) {
define i1 @une_ugt_and_f64(double %w, double %x, double %y, double %z) {
; SSE2-LABEL: une_ugt_and_f64:
; SSE2: # %bb.0:
-; SSE2-NEXT: ucomisd %xmm1, %xmm0
-; SSE2-NEXT: setp %al
-; SSE2-NEXT: setne %cl
-; SSE2-NEXT: orb %al, %cl
-; SSE2-NEXT: ucomisd %xmm2, %xmm3
-; SSE2-NEXT: setb %al
-; SSE2-NEXT: andb %cl, %al
+; SSE2-NEXT: cmpnlepd %xmm3, %xmm2
+; SSE2-NEXT: cmpneqpd %xmm1, %xmm0
+; SSE2-NEXT: andpd %xmm2, %xmm0
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: # kill: def $al killed $al killed $eax
; SSE2-NEXT: retq
;
; AVX1-LABEL: une_ugt_and_f64:
@@ -160,11 +156,11 @@ define i1 @une_ugt_and_f64(double %w, double %x, double %y, double %z) {
define i1 @ult_uge_or_f64(double %w, double %x, double %y, double %z) {
; SSE2-LABEL: ult_uge_or_f64:
; SSE2: # %bb.0:
-; SSE2-NEXT: ucomisd %xmm1, %xmm0
-; SSE2-NEXT: setb %cl
-; SSE2-NEXT: ucomisd %xmm2, %xmm3
-; SSE2-NEXT: setbe %al
-; SSE2-NEXT: orb %cl, %al
+; SSE2-NEXT: cmpnltpd %xmm3, %xmm2
+; SSE2-NEXT: cmpnlepd %xmm0, %xmm1
+; SSE2-NEXT: orpd %xmm2, %xmm1
+; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: # kill: def $al killed $al killed $eax
; SSE2-NEXT: retq
;
; AVX1-LABEL: ult_uge_or_f64:
@@ -198,13 +194,11 @@ define i1 @ult_uge_or_f64(double %w, double %x, double %y, double %z) {
define i1 @une_uno_xor_f64(double %w, double %x, double %y, double %z) {
; SSE2-LABEL: une_uno_xor_f64:
; SSE2: # %bb.0:
-; SSE2-NEXT: ucomisd %xmm1, %xmm0
-; SSE2-NEXT: setp %al
-; SSE2-NEXT: setne %cl
-; SSE2-NEXT: orb %al, %cl
-; SSE2-NEXT: ucomisd %xmm3, %xmm2
-; SSE2-NEXT: setp %al
-; SSE2-NEXT: xorb %cl, %al
+; SSE2-NEXT: cmpunordpd %xmm3, %xmm2
+; SSE2-NEXT: cmpneqpd %xmm1, %xmm0
+; SSE2-NEXT: xorpd %xmm2, %xmm0
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: # kill: def $al killed $al killed $eax
; SSE2-NEXT: retq
;
; AVX1-LABEL: une_uno_xor_f64:
@@ -337,11 +331,11 @@ define i1 @f32cmp3(float %x, float %y, float %z, float %w) {
; SSE2-LABEL: f32cmp3:
; SSE2: # %bb.0:
; SSE2-NEXT: xorps %xmm4, %xmm4
-; SSE2-NEXT: ucomiss %xmm4, %xmm0
-; SSE2-NEXT: seta %al
-; SSE2-NEXT: ucomiss %xmm4, %xmm1
-; SSE2-NEXT: seta %cl
-; SSE2-NEXT: orb %al, %cl
+; SSE2-NEXT: xorps %xmm5, %xmm5
+; SSE2-NEXT: cmpltps %xmm1, %xmm5
+; SSE2-NEXT: cmpltps %xmm0, %xmm4
+; SSE2-NEXT: orps %xmm5, %xmm4
+; SSE2-NEXT: movd %xmm4, %ecx
; SSE2-NEXT: ucomiss %xmm2, %xmm3
; SSE2-NEXT: seta %al
; SSE2-NEXT: xorb %cl, %al
diff --git a/llvm/test/CodeGen/X86/pr40539.ll b/llvm/test/CodeGen/X86/pr40539.ll
index 85c329eafd1bb..d74967497a13b 100644
--- a/llvm/test/CodeGen/X86/pr40539.ll
+++ b/llvm/test/CodeGen/X86/pr40539.ll
@@ -40,20 +40,22 @@ define zeroext i1 @_Z8test_cosv() {
; CHECK-NEXT: subl $8, %esp
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; CHECK-NEXT: divss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; CHECK-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
+; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: #APP
; CHECK-NEXT: fcos
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: fstps (%esp)
-; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: ucomiss %xmm0, %xmm1
-; CHECK-NEXT: setae %cl
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; CHECK-NEXT: setae %al
-; CHECK-NEXT: andb %cl, %al
+; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: cmpleps %xmm1, %xmm0
+; CHECK-NEXT: cmpleps %xmm2, %xmm1
+; CHECK-NEXT: andps %xmm0, %xmm1
+; CHECK-NEXT: movd %xmm1, %eax
+; CHECK-NEXT: andb $1, %al
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: addl $8, %esp
; CHECK-NEXT: .cfi_def_cfa_offset 4
; CHECK-NEXT: retl
More information about the llvm-commits
mailing list