[llvm] [X86] SimplifyDemandedVectorEltsForTargetNode - replace packed fcmp node with scalar fcmp node if only element0 is demanded (PR #140563)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon May 19 13:27:36 PDT 2025
https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/140563
>From 0c23ac9161f117a061a4c270a18e2da725d99da4 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Mon, 19 May 2025 16:37:17 +0100
Subject: [PATCH] [X86] SimplifyDemandedVectorEltsForTargetNode - replace
packed fcmp node with scalar fcmp node if only elemnt0 is demanded
These unnecessary vectorisation can appear due to fplogic opcodes only being available for 128-bit types - which can prevent folds that only work on the scalar source types and also lead to fcmp of garbage data in the upper elements.
Fixes #140534
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 22 +++
llvm/test/CodeGen/X86/and-or-setcc.ll | 8 +-
.../test/CodeGen/X86/extract-vselect-setcc.ll | 3 +-
llvm/test/CodeGen/X86/extractelement-fp.ll | 4 +-
llvm/test/CodeGen/X86/fcmp-logic.ll | 145 +++++++-----------
llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll | 4 +-
llvm/test/CodeGen/X86/pr40539.ll | 13 +-
.../CodeGen/X86/vector-reduce-fmax-nnan.ll | 9 +-
.../CodeGen/X86/vector-reduce-fmin-nnan.ll | 9 +-
9 files changed, 99 insertions(+), 118 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3b6b0d7b86c9c..012319197a65e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -43450,6 +43450,28 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
KnownZero = LHSZero;
break;
}
+ case X86ISD::CMPM:
+ case X86ISD::CMPP: {
+ // Scalarize packed fp comparison if we only require element 0.
+ if (DemandedElts == 1) {
+ SDLoc dl(Op);
+ MVT VT = Op.getSimpleValueType();
+ MVT OpSVT = Op.getOperand(0).getSimpleValueType().getScalarType();
+ SDValue LHS = TLO.DAG.getExtractVectorElt(dl, OpSVT, Op.getOperand(0), 0);
+ SDValue RHS = TLO.DAG.getExtractVectorElt(dl, OpSVT, Op.getOperand(1), 0);
+ SDValue CC = Op.getOperand(2);
+ if (Opc == X86ISD::CMPM) {
+ SDValue Cmp =
+ TLO.DAG.getNode(X86ISD::FSETCCM, dl, MVT::v1i1, LHS, RHS, CC);
+ return TLO.CombineTo(
+ Op, TLO.DAG.getInsertSubvector(dl, TLO.DAG.getUNDEF(VT), Cmp, 0));
+ }
+ SDValue Cmp = TLO.DAG.getNode(X86ISD::FSETCC, dl, OpSVT, LHS, RHS, CC);
+ return TLO.CombineTo(Op,
+ TLO.DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Cmp));
+ }
+ break;
+ }
case X86ISD::PCMPEQ:
case X86ISD::PCMPGT: {
APInt LHSUndef, LHSZero;
diff --git a/llvm/test/CodeGen/X86/and-or-setcc.ll b/llvm/test/CodeGen/X86/and-or-setcc.ll
index a6a9362908811..4484f23bbda36 100644
--- a/llvm/test/CodeGen/X86/and-or-setcc.ll
+++ b/llvm/test/CodeGen/X86/and-or-setcc.ll
@@ -17,8 +17,8 @@ define i1 @and_ord(float %a, float %b) {
; X64-LABEL: and_ord:
; X64: # %bb.0:
; X64-NEXT: xorps %xmm2, %xmm2
-; X64-NEXT: cmpordps %xmm2, %xmm1
-; X64-NEXT: cmpordps %xmm2, %xmm0
+; X64-NEXT: cmpordss %xmm2, %xmm1
+; X64-NEXT: cmpordss %xmm2, %xmm0
; X64-NEXT: andps %xmm1, %xmm0
; X64-NEXT: movd %xmm0, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
@@ -44,8 +44,8 @@ define i1 @or_uno(float %a, float %b) {
; X64-LABEL: or_uno:
; X64: # %bb.0:
; X64-NEXT: xorps %xmm2, %xmm2
-; X64-NEXT: cmpunordps %xmm2, %xmm1
-; X64-NEXT: cmpunordps %xmm2, %xmm0
+; X64-NEXT: cmpunordss %xmm2, %xmm1
+; X64-NEXT: cmpunordss %xmm2, %xmm0
; X64-NEXT: orps %xmm1, %xmm0
; X64-NEXT: movd %xmm0, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
diff --git a/llvm/test/CodeGen/X86/extract-vselect-setcc.ll b/llvm/test/CodeGen/X86/extract-vselect-setcc.ll
index 81ab104cab283..96c8e773d5edd 100644
--- a/llvm/test/CodeGen/X86/extract-vselect-setcc.ll
+++ b/llvm/test/CodeGen/X86/extract-vselect-setcc.ll
@@ -5,10 +5,9 @@ define void @PR117684(i1 %cond, <8 x float> %vec, ptr %ptr1, ptr %ptr2) #0 {
; CHECK-LABEL: PR117684:
; CHECK: # %bb.0:
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vcmpnltps %ymm1, %ymm0, %k1
+; CHECK-NEXT: vcmpnltss %xmm1, %xmm0, %k1
; CHECK-NEXT: vbroadcastss {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN]
; CHECK-NEXT: vinsertf32x4 $0, %xmm0, %ymm0, %ymm0 {%k1} {z}
-; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
; CHECK-NEXT: vbroadcastss %xmm2, %ymm2
diff --git a/llvm/test/CodeGen/X86/extractelement-fp.ll b/llvm/test/CodeGen/X86/extractelement-fp.ll
index 944f6bbfd0bfb..1706f17eac165 100644
--- a/llvm/test/CodeGen/X86/extractelement-fp.ll
+++ b/llvm/test/CodeGen/X86/extractelement-fp.ll
@@ -319,7 +319,7 @@ define void @extsetcc(<4 x float> %x) {
define <3 x double> @extvselectsetcc_crash(<2 x double> %x) {
; X64-LABEL: extvselectsetcc_crash:
; X64: # %bb.0:
-; X64-NEXT: vcmpeqpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; X64-NEXT: vcmpeqsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; X64-NEXT: vmovsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0]
; X64-NEXT: vandpd %xmm2, %xmm1, %xmm1
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
@@ -328,7 +328,7 @@ define <3 x double> @extvselectsetcc_crash(<2 x double> %x) {
;
; X86-LABEL: extvselectsetcc_crash:
; X86: # %bb.0:
-; X86-NEXT: vcmpeqpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
+; X86-NEXT: vcmpeqsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
; X86-NEXT: vmovsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0]
; X86-NEXT: vandpd %xmm2, %xmm1, %xmm1
; X86-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
diff --git a/llvm/test/CodeGen/X86/fcmp-logic.ll b/llvm/test/CodeGen/X86/fcmp-logic.ll
index 794b0ad92aef6..7b806bca43c2e 100644
--- a/llvm/test/CodeGen/X86/fcmp-logic.ll
+++ b/llvm/test/CodeGen/X86/fcmp-logic.ll
@@ -6,8 +6,8 @@
define i1 @olt_ole_and_f32(float %w, float %x, float %y, float %z) {
; SSE2-LABEL: olt_ole_and_f32:
; SSE2: # %bb.0:
-; SSE2-NEXT: cmpleps %xmm3, %xmm2
-; SSE2-NEXT: cmpltps %xmm1, %xmm0
+; SSE2-NEXT: cmpless %xmm3, %xmm2
+; SSE2-NEXT: cmpltss %xmm1, %xmm0
; SSE2-NEXT: andps %xmm2, %xmm0
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: # kill: def $al killed $al killed $eax
@@ -15,8 +15,8 @@ define i1 @olt_ole_and_f32(float %w, float %x, float %y, float %z) {
;
; AVX1-LABEL: olt_ole_and_f32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vcmpleps %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vcmpltps %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vcmpless %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcmpltss %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vandps %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
@@ -24,15 +24,11 @@ define i1 @olt_ole_and_f32(float %w, float %x, float %y, float %z) {
;
; AVX512-LABEL: olt_ole_and_f32:
; AVX512: # %bb.0:
-; AVX512-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
-; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512-NEXT: vcmpltps %zmm1, %zmm0, %k1
-; AVX512-NEXT: vcmpleps %zmm3, %zmm2, %k0 {%k1}
+; AVX512-NEXT: vcmpless %xmm3, %xmm2, %k0
+; AVX512-NEXT: vcmpltss %xmm1, %xmm0, %k1
+; AVX512-NEXT: kandw %k0, %k1, %k0
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
-; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%f1 = fcmp olt float %w, %x
%f2 = fcmp ole float %y, %z
@@ -43,8 +39,8 @@ define i1 @olt_ole_and_f32(float %w, float %x, float %y, float %z) {
define i1 @oge_oeq_or_f32(float %w, float %x, float %y, float %z) {
; SSE2-LABEL: oge_oeq_or_f32:
; SSE2: # %bb.0:
-; SSE2-NEXT: cmpeqps %xmm3, %xmm2
-; SSE2-NEXT: cmpleps %xmm0, %xmm1
+; SSE2-NEXT: cmpeqss %xmm3, %xmm2
+; SSE2-NEXT: cmpless %xmm0, %xmm1
; SSE2-NEXT: orps %xmm2, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: # kill: def $al killed $al killed $eax
@@ -52,8 +48,8 @@ define i1 @oge_oeq_or_f32(float %w, float %x, float %y, float %z) {
;
; AVX1-LABEL: oge_oeq_or_f32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vcmpeqps %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vcmpleps %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vcmpeqss %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcmpless %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vorps %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
@@ -61,16 +57,11 @@ define i1 @oge_oeq_or_f32(float %w, float %x, float %y, float %z) {
;
; AVX512-LABEL: oge_oeq_or_f32:
; AVX512: # %bb.0:
-; AVX512-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
-; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512-NEXT: vcmpeqps %zmm3, %zmm2, %k0
-; AVX512-NEXT: vcmpleps %zmm0, %zmm1, %k1
+; AVX512-NEXT: vcmpeqss %xmm3, %xmm2, %k0
+; AVX512-NEXT: vcmpless %xmm0, %xmm1, %k1
; AVX512-NEXT: korw %k0, %k1, %k0
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
-; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%f1 = fcmp oge float %w, %x
%f2 = fcmp oeq float %y, %z
@@ -90,8 +81,8 @@ define i1 @ord_one_xor_f32(float %w, float %x, float %y, float %z) {
;
; AVX1-LABEL: ord_one_xor_f32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vcmpneq_oqps %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vcmpordps %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vcmpneq_oqss %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcmpordss %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
@@ -99,16 +90,11 @@ define i1 @ord_one_xor_f32(float %w, float %x, float %y, float %z) {
;
; AVX512-LABEL: ord_one_xor_f32:
; AVX512: # %bb.0:
-; AVX512-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
-; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512-NEXT: vcmpneq_oqps %zmm3, %zmm2, %k0
-; AVX512-NEXT: vcmpordps %zmm1, %zmm0, %k1
+; AVX512-NEXT: vcmpneq_oqss %xmm3, %xmm2, %k0
+; AVX512-NEXT: vcmpordss %xmm1, %xmm0, %k1
; AVX512-NEXT: kxorw %k0, %k1, %k0
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
-; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%f1 = fcmp ord float %w, %x
%f2 = fcmp one float %y, %z
@@ -120,8 +106,8 @@ define i1 @ord_one_xor_f32(float %w, float %x, float %y, float %z) {
define i1 @une_oeq_xor_f32(float %w, float %x, float %y, float %z) {
; SSE2-LABEL: une_oeq_xor_f32:
; SSE2: # %bb.0:
-; SSE2-NEXT: cmpeqps %xmm3, %xmm2
-; SSE2-NEXT: cmpneqps %xmm1, %xmm0
+; SSE2-NEXT: cmpeqss %xmm3, %xmm2
+; SSE2-NEXT: cmpneqss %xmm1, %xmm0
; SSE2-NEXT: xorps %xmm2, %xmm0
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: # kill: def $al killed $al killed $eax
@@ -129,8 +115,8 @@ define i1 @une_oeq_xor_f32(float %w, float %x, float %y, float %z) {
;
; AVX1-LABEL: une_oeq_xor_f32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vcmpeqps %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vcmpneqps %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vcmpeqss %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcmpneqss %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
@@ -138,16 +124,11 @@ define i1 @une_oeq_xor_f32(float %w, float %x, float %y, float %z) {
;
; AVX512-LABEL: une_oeq_xor_f32:
; AVX512: # %bb.0:
-; AVX512-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
-; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512-NEXT: vcmpeqps %zmm3, %zmm2, %k0
-; AVX512-NEXT: vcmpneqps %zmm1, %zmm0, %k1
+; AVX512-NEXT: vcmpeqss %xmm3, %xmm2, %k0
+; AVX512-NEXT: vcmpneqss %xmm1, %xmm0, %k1
; AVX512-NEXT: kxorw %k0, %k1, %k0
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
-; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%f1 = fcmp une float %w, %x
%f2 = fcmp oeq float %y, %z
@@ -158,8 +139,8 @@ define i1 @une_oeq_xor_f32(float %w, float %x, float %y, float %z) {
define i1 @une_ugt_and_f64(double %w, double %x, double %y, double %z) {
; SSE2-LABEL: une_ugt_and_f64:
; SSE2: # %bb.0:
-; SSE2-NEXT: cmpnlepd %xmm3, %xmm2
-; SSE2-NEXT: cmpneqpd %xmm1, %xmm0
+; SSE2-NEXT: cmpnlesd %xmm3, %xmm2
+; SSE2-NEXT: cmpneqsd %xmm1, %xmm0
; SSE2-NEXT: andpd %xmm2, %xmm0
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: # kill: def $al killed $al killed $eax
@@ -167,8 +148,8 @@ define i1 @une_ugt_and_f64(double %w, double %x, double %y, double %z) {
;
; AVX1-LABEL: une_ugt_and_f64:
; AVX1: # %bb.0:
-; AVX1-NEXT: vcmpnlepd %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vcmpneqpd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vcmpnlesd %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcmpneqsd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vandpd %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
@@ -176,15 +157,11 @@ define i1 @une_ugt_and_f64(double %w, double %x, double %y, double %z) {
;
; AVX512-LABEL: une_ugt_and_f64:
; AVX512: # %bb.0:
-; AVX512-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
-; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512-NEXT: vcmpneqpd %zmm1, %zmm0, %k1
-; AVX512-NEXT: vcmpnlepd %zmm3, %zmm2, %k0 {%k1}
+; AVX512-NEXT: vcmpnlesd %xmm3, %xmm2, %k0
+; AVX512-NEXT: vcmpneqsd %xmm1, %xmm0, %k1
+; AVX512-NEXT: kandw %k0, %k1, %k0
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
-; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%f1 = fcmp une double %w, %x
%f2 = fcmp ugt double %y, %z
@@ -195,8 +172,8 @@ define i1 @une_ugt_and_f64(double %w, double %x, double %y, double %z) {
define i1 @ult_uge_or_f64(double %w, double %x, double %y, double %z) {
; SSE2-LABEL: ult_uge_or_f64:
; SSE2: # %bb.0:
-; SSE2-NEXT: cmpnltpd %xmm3, %xmm2
-; SSE2-NEXT: cmpnlepd %xmm0, %xmm1
+; SSE2-NEXT: cmpnltsd %xmm3, %xmm2
+; SSE2-NEXT: cmpnlesd %xmm0, %xmm1
; SSE2-NEXT: orpd %xmm2, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: # kill: def $al killed $al killed $eax
@@ -204,8 +181,8 @@ define i1 @ult_uge_or_f64(double %w, double %x, double %y, double %z) {
;
; AVX1-LABEL: ult_uge_or_f64:
; AVX1: # %bb.0:
-; AVX1-NEXT: vcmpnltpd %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vcmpnlepd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vcmpnltsd %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcmpnlesd %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vorpd %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
@@ -213,16 +190,11 @@ define i1 @ult_uge_or_f64(double %w, double %x, double %y, double %z) {
;
; AVX512-LABEL: ult_uge_or_f64:
; AVX512: # %bb.0:
-; AVX512-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
-; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512-NEXT: vcmpnltpd %zmm3, %zmm2, %k0
-; AVX512-NEXT: vcmpnlepd %zmm0, %zmm1, %k1
+; AVX512-NEXT: vcmpnltsd %xmm3, %xmm2, %k0
+; AVX512-NEXT: vcmpnlesd %xmm0, %xmm1, %k1
; AVX512-NEXT: korw %k0, %k1, %k0
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
-; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%f1 = fcmp ult double %w, %x
%f2 = fcmp uge double %y, %z
@@ -233,8 +205,8 @@ define i1 @ult_uge_or_f64(double %w, double %x, double %y, double %z) {
define i1 @une_uno_xor_f64(double %w, double %x, double %y, double %z) {
; SSE2-LABEL: une_uno_xor_f64:
; SSE2: # %bb.0:
-; SSE2-NEXT: cmpunordpd %xmm3, %xmm2
-; SSE2-NEXT: cmpneqpd %xmm1, %xmm0
+; SSE2-NEXT: cmpunordsd %xmm3, %xmm2
+; SSE2-NEXT: cmpneqsd %xmm1, %xmm0
; SSE2-NEXT: xorpd %xmm2, %xmm0
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: # kill: def $al killed $al killed $eax
@@ -242,8 +214,8 @@ define i1 @une_uno_xor_f64(double %w, double %x, double %y, double %z) {
;
; AVX1-LABEL: une_uno_xor_f64:
; AVX1: # %bb.0:
-; AVX1-NEXT: vcmpunordpd %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vcmpneqpd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vcmpunordsd %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcmpneqsd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
@@ -251,16 +223,11 @@ define i1 @une_uno_xor_f64(double %w, double %x, double %y, double %z) {
;
; AVX512-LABEL: une_uno_xor_f64:
; AVX512: # %bb.0:
-; AVX512-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
-; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512-NEXT: vcmpunordpd %zmm3, %zmm2, %k0
-; AVX512-NEXT: vcmpneqpd %zmm1, %zmm0, %k1
+; AVX512-NEXT: vcmpunordsd %xmm3, %xmm2, %k0
+; AVX512-NEXT: vcmpneqsd %xmm1, %xmm0, %k1
; AVX512-NEXT: kxorw %k0, %k1, %k0
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
-; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%f1 = fcmp une double %w, %x
%f2 = fcmp uno double %y, %z
@@ -371,8 +338,8 @@ define i1 @f32cmp3(float %x, float %y, float %z, float %w) {
; SSE2: # %bb.0:
; SSE2-NEXT: xorps %xmm4, %xmm4
; SSE2-NEXT: xorps %xmm5, %xmm5
-; SSE2-NEXT: cmpltps %xmm1, %xmm5
-; SSE2-NEXT: cmpltps %xmm0, %xmm4
+; SSE2-NEXT: cmpltss %xmm1, %xmm5
+; SSE2-NEXT: cmpltss %xmm0, %xmm4
; SSE2-NEXT: orps %xmm5, %xmm4
; SSE2-NEXT: movd %xmm4, %ecx
; SSE2-NEXT: ucomiss %xmm2, %xmm3
@@ -383,8 +350,8 @@ define i1 @f32cmp3(float %x, float %y, float %z, float %w) {
; AVX1-LABEL: f32cmp3:
; AVX1: # %bb.0:
; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vcmpltps %xmm1, %xmm4, %xmm1
-; AVX1-NEXT: vcmpltps %xmm0, %xmm4, %xmm0
+; AVX1-NEXT: vcmpltss %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vcmpltss %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %ecx
; AVX1-NEXT: vucomiss %xmm2, %xmm3
@@ -394,17 +361,14 @@ define i1 @f32cmp3(float %x, float %y, float %z, float %w) {
;
; AVX512-LABEL: f32cmp3:
; AVX512: # %bb.0:
-; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512-NEXT: vxorps %xmm4, %xmm4, %xmm4
-; AVX512-NEXT: vcmpltps %zmm1, %zmm4, %k0
-; AVX512-NEXT: vcmpltps %zmm0, %zmm4, %k1
+; AVX512-NEXT: vcmpltss %xmm1, %xmm4, %k0
+; AVX512-NEXT: vcmpltss %xmm0, %xmm4, %k1
; AVX512-NEXT: korw %k0, %k1, %k0
; AVX512-NEXT: kmovw %k0, %ecx
; AVX512-NEXT: vucomiss %xmm2, %xmm3
; AVX512-NEXT: seta %al
; AVX512-NEXT: xorb %cl, %al
-; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%cmpx = fcmp ogt float %x, 0.0
%cmpy = fcmp ogt float %y, 0.0
@@ -425,8 +389,8 @@ define i1 @PR140534(i32 %a0, i32 %a1, i32 %a2) {
; SSE2-NEXT: cvtsi2sd %rax, %xmm2
; SSE2-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: movapd %xmm1, %xmm3
-; SSE2-NEXT: cmpltpd %xmm2, %xmm3
-; SSE2-NEXT: cmpltpd %xmm0, %xmm1
+; SSE2-NEXT: cmpltsd %xmm2, %xmm3
+; SSE2-NEXT: cmpltsd %xmm0, %xmm1
; SSE2-NEXT: orpd %xmm3, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: # kill: def $al killed $al killed $eax
@@ -441,8 +405,8 @@ define i1 @PR140534(i32 %a0, i32 %a1, i32 %a2) {
; AVX1-NEXT: movl %edx, %eax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; AVX1-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vcmpltpd %xmm2, %xmm1, %xmm2
-; AVX1-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vcmpltsd %xmm2, %xmm1, %xmm2
+; AVX1-NEXT: vcmpltsd %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vorpd %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
@@ -454,12 +418,11 @@ define i1 @PR140534(i32 %a0, i32 %a1, i32 %a2) {
; AVX512-NEXT: vcvtusi2sd %esi, %xmm1, %xmm1
; AVX512-NEXT: vcvtusi2sd %edx, %xmm2, %xmm2
; AVX512-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vcmpltpd %zmm2, %zmm1, %k0
-; AVX512-NEXT: vcmpltpd %zmm0, %zmm1, %k1
+; AVX512-NEXT: vcmpltsd %xmm2, %xmm1, %k0
+; AVX512-NEXT: vcmpltsd %xmm0, %xmm1, %k1
; AVX512-NEXT: korw %k0, %k1, %k0
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
-; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%conv0 = uitofp i32 %a0 to double
%conv1 = uitofp i32 %a1 to double
diff --git a/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll b/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll
index d9d5e2846ed0f..43bac05988e29 100644
--- a/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll
+++ b/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll
@@ -322,8 +322,8 @@ define i32 @test_zext_cmp11(double %a, double %b) "no-nans-fp-math"="true" {
; ALL-LABEL: test_zext_cmp11:
; ALL: # %bb.0: # %entry
; ALL-NEXT: vxorpd %xmm2, %xmm2, %xmm2
-; ALL-NEXT: vcmpeqpd %xmm2, %xmm1, %xmm1
-; ALL-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0
+; ALL-NEXT: vcmpeqsd %xmm2, %xmm1, %xmm1
+; ALL-NEXT: vcmpeqsd %xmm2, %xmm0, %xmm0
; ALL-NEXT: vorpd %xmm1, %xmm0, %xmm0
; ALL-NEXT: vmovd %xmm0, %eax
; ALL-NEXT: andl $1, %eax
diff --git a/llvm/test/CodeGen/X86/pr40539.ll b/llvm/test/CodeGen/X86/pr40539.ll
index 56d80a025fa08..a920efbec59ea 100644
--- a/llvm/test/CodeGen/X86/pr40539.ll
+++ b/llvm/test/CodeGen/X86/pr40539.ll
@@ -40,20 +40,19 @@ define zeroext i1 @_Z8test_cosv() {
; CHECK-NEXT: subl $8, %esp
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: movss {{.*#+}} xmm2 = [8.70000004E-1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movss {{.*#+}} xmm1 = [8.60000014E-1,0.0E+0,0.0E+0,0.0E+0]
; CHECK-NEXT: divss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; CHECK-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
-; CHECK-NEXT: movss {{.*#+}} xmm0 = [8.60000014E-1,0.0E+0,0.0E+0,0.0E+0]
; CHECK-NEXT: flds {{[0-9]+}}(%esp)
; CHECK-NEXT: #APP
; CHECK-NEXT: fcos
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: fstps (%esp)
-; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: cmpleps %xmm1, %xmm0
-; CHECK-NEXT: cmpleps %xmm2, %xmm1
-; CHECK-NEXT: andps %xmm0, %xmm1
-; CHECK-NEXT: movd %xmm1, %eax
+; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: cmpless %xmm0, %xmm1
+; CHECK-NEXT: cmpless {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; CHECK-NEXT: andps %xmm1, %xmm0
+; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: andb $1, %al
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: addl $8, %esp
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
index 060bd1764d3c4..179790c46f33c 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
@@ -426,18 +426,17 @@ define half @test_v2f16(<2 x half> %a0) nounwind {
; AVX512VL-LABEL: test_v2f16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512VL-NEXT: vcvtph2ps %xmm0, %ymm2
-; AVX512VL-NEXT: vcvtph2ps %xmm1, %ymm3
-; AVX512VL-NEXT: vcmpltps %ymm2, %ymm3, %k1
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm2
+; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm3
+; AVX512VL-NEXT: vcmpltss %xmm2, %xmm3, %k1
; AVX512VL-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1}
; AVX512VL-NEXT: vmovdqa %xmm1, %xmm0
-; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512FP16-LABEL: test_v2f16:
; AVX512FP16: # %bb.0:
; AVX512FP16-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512FP16-NEXT: vcmpltph %xmm0, %xmm1, %k1
+; AVX512FP16-NEXT: vcmpltsh %xmm0, %xmm1, %k1
; AVX512FP16-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1}
; AVX512FP16-NEXT: vmovaps %xmm1, %xmm0
; AVX512FP16-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
index 4d6daf3fb77f0..465988760d44a 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll
@@ -426,18 +426,17 @@ define half @test_v2f16(<2 x half> %a0) nounwind {
; AVX512VL-LABEL: test_v2f16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512VL-NEXT: vcvtph2ps %xmm0, %ymm2
-; AVX512VL-NEXT: vcvtph2ps %xmm1, %ymm3
-; AVX512VL-NEXT: vcmpltps %ymm3, %ymm2, %k1
+; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm2
+; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm3
+; AVX512VL-NEXT: vcmpltss %xmm3, %xmm2, %k1
; AVX512VL-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1}
; AVX512VL-NEXT: vmovdqa %xmm1, %xmm0
-; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512FP16-LABEL: test_v2f16:
; AVX512FP16: # %bb.0:
; AVX512FP16-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX512FP16-NEXT: vcmpltph %xmm1, %xmm0, %k1
+; AVX512FP16-NEXT: vcmpltsh %xmm1, %xmm0, %k1
; AVX512FP16-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1}
; AVX512FP16-NEXT: vmovaps %xmm1, %xmm0
; AVX512FP16-NEXT: retq
More information about the llvm-commits
mailing list