[llvm] 50cd2ff - [X86] Avoid usage constant -1 for fminimum/fmaximum lowering

Serguei Katkov via llvm-commits llvm-commits at lists.llvm.org
Fri May 5 02:24:49 PDT 2023


Author: Serguei Katkov
Date: 2023-05-05T16:24:33+07:00
New Revision: 50cd2ff7bc5af557d23fb306dddaff794e22bb01

URL: https://github.com/llvm/llvm-project/commit/50cd2ff7bc5af557d23fb306dddaff794e22bb01
DIFF: https://github.com/llvm/llvm-project/commit/50cd2ff7bc5af557d23fb306dddaff794e22bb01.diff

LOG: [X86] Avoid usage constant -1 for fminimum/fmaximum lowering

Instead of equality comparison of value to preferred zero we can check just
the sign of value and if sign is set we should put this value as second operand for minimum
and first operand for maximum.
In this case FMIN/FMAX will choose the right result for 0.f and -0.f comparison.

This allows us:
1. avoid loading of big 64-bit constant for fminimum.
2. for double on non-64-nib platform we need to check only high part of value.
3. test against zero to check sign takes less size of instruction

Additionally, if we know that any of value is guaranteed to be non-zero
we should not care about 0.f and -0.f comparison.

Reviewed By: e-kud
Differential Revision: https://reviews.llvm.org/D149812

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll
    llvm/test/CodeGen/X86/extractelement-fp.ll
    llvm/test/CodeGen/X86/fminimum-fmaximum.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2706b0289987..0b1b9768f32e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -30276,13 +30276,15 @@ static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget,
   bool IsXNeverNaN = DAG.isKnownNeverNaN(X);
   bool IsYNeverNaN = DAG.isKnownNeverNaN(Y);
   bool IgnoreSignedZero = DAG.getTarget().Options.NoSignedZerosFPMath ||
-                          Op->getFlags().hasNoSignedZeros();
+                          Op->getFlags().hasNoSignedZeros() ||
+                          DAG.isKnownNeverZeroFloat(X) ||
+                          DAG.isKnownNeverZeroFloat(Y);
   SDValue NewX, NewY;
-  if (IgnoreSignedZero || IsPreferredZero(Y) || DAG.isKnownNeverZeroFloat(X)) {
+  if (IgnoreSignedZero || IsPreferredZero(Y)) {
     // Operands are already in right order or order does not matter.
     NewX = X;
     NewY = Y;
-  } else if (IsPreferredZero(X) || DAG.isKnownNeverZeroFloat(Y)) {
+  } else if (IsPreferredZero(X)) {
     NewX = Y;
     NewY = X;
   } else if ((VT == MVT::f16 || Subtarget.hasDQI()) &&
@@ -30307,34 +30309,32 @@ static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget,
     NewY = DAG.getSelect(DL, VT, NeedSwap, X, Y);
     return DAG.getNode(MinMaxOp, DL, VT, NewX, NewY, Op->getFlags());
   } else {
-    SDValue IsXZero;
+    SDValue IsXSigned;
     if (Subtarget.is64Bit() || VT != MVT::f64) {
       SDValue XInt = DAG.getNode(ISD::BITCAST, DL, IVT, X);
-      SDValue ZeroCst = DAG.getConstant(PreferredZero, DL, IVT);
-      IsXZero = DAG.getSetCC(DL, SetCCType, XInt, ZeroCst, ISD::SETEQ);
+      SDValue ZeroCst = DAG.getConstant(0, DL, IVT);
+      IsXSigned = DAG.getSetCC(DL, SetCCType, XInt, ZeroCst, ISD::SETLT);
     } else {
       assert(VT == MVT::f64);
       SDValue Ins = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2f64,
                                 DAG.getConstantFP(0, DL, MVT::v2f64), X,
                                 DAG.getIntPtrConstant(0, DL));
       SDValue VX = DAG.getNode(ISD::BITCAST, DL, MVT::v4f32, Ins);
-      SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VX,
-                               DAG.getIntPtrConstant(0, DL));
-      Lo = DAG.getBitcast(MVT::i32, Lo);
       SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VX,
                                DAG.getIntPtrConstant(1, DL));
       Hi = DAG.getBitcast(MVT::i32, Hi);
-      PreferredZero = APInt::getZero(SizeInBits / 2);
-      if (MinMaxOp == X86ISD::FMIN)
-        PreferredZero.setSignBit();
-      IsXZero = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
-                            DAG.getConstant(PreferredZero, DL, MVT::i32));
-      IsXZero = DAG.getNode(ISD::OR, DL, MVT::i32, Lo, IsXZero);
-      IsXZero = DAG.getSetCC(DL, SetCCType, IsXZero,
-                             DAG.getConstant(0, DL, MVT::i32), ISD::SETEQ);
-    }
-    NewX = DAG.getSelect(DL, VT, IsXZero, Y, X);
-    NewY = DAG.getSelect(DL, VT, IsXZero, X, Y);
+      SDValue ZeroCst = DAG.getConstant(0, DL, MVT::i32);
+      EVT SetCCType = TLI.getSetCCResultType(DAG.getDataLayout(),
+                                             *DAG.getContext(), MVT::i32);
+      IsXSigned = DAG.getSetCC(DL, SetCCType, Hi, ZeroCst, ISD::SETLT);
+    }
+    if (MinMaxOp == X86ISD::FMAX) {
+      NewX = DAG.getSelect(DL, VT, IsXSigned, X, Y);
+      NewY = DAG.getSelect(DL, VT, IsXSigned, Y, X);
+    } else {
+      NewX = DAG.getSelect(DL, VT, IsXSigned, Y, X);
+      NewY = DAG.getSelect(DL, VT, IsXSigned, X, Y);
+    }
   }
 
   bool IgnoreNaN = DAG.getTarget().Options.NoNaNsFPMath ||

diff  --git a/llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll
index cc163954dc86..1782e5252870 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll
@@ -10,9 +10,8 @@ define half @test_fminimum(half %x, half %y) {
 ; CHECK-LABEL: test_fminimum:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovw %xmm0, %eax
-; CHECK-NEXT:    movzwl %ax, %eax
-; CHECK-NEXT:    cmpl $32768, %eax # imm = 0x8000
-; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    testw %ax, %ax
+; CHECK-NEXT:    sets %al
 ; CHECK-NEXT:    kmovd %eax, %k1
 ; CHECK-NEXT:    vmovaps %xmm1, %xmm2
 ; CHECK-NEXT:    vmovsh %xmm0, %xmm0, %xmm2 {%k1}
@@ -119,15 +118,14 @@ define half @test_fmaximum(half %x, half %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovw %xmm0, %eax
 ; CHECK-NEXT:    testw %ax, %ax
-; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    sets %al
 ; CHECK-NEXT:    kmovd %eax, %k1
-; CHECK-NEXT:    vmovaps %xmm1, %xmm2
-; CHECK-NEXT:    vmovsh %xmm0, %xmm0, %xmm2 {%k1}
-; CHECK-NEXT:    vmovsh %xmm1, %xmm0, %xmm0 {%k1}
-; CHECK-NEXT:    vmaxsh %xmm2, %xmm0, %xmm1
-; CHECK-NEXT:    vcmpunordsh %xmm0, %xmm0, %k1
+; CHECK-NEXT:    vmovaps %xmm0, %xmm2
+; CHECK-NEXT:    vmovsh %xmm1, %xmm0, %xmm2 {%k1}
 ; CHECK-NEXT:    vmovsh %xmm0, %xmm0, %xmm1 {%k1}
-; CHECK-NEXT:    vmovaps %xmm1, %xmm0
+; CHECK-NEXT:    vmaxsh %xmm2, %xmm1, %xmm0
+; CHECK-NEXT:    vcmpunordsh %xmm1, %xmm1, %k1
+; CHECK-NEXT:    vmovsh %xmm1, %xmm0, %xmm0 {%k1}
 ; CHECK-NEXT:    retq
   %r = call half @llvm.maximum.f16(half %x, half %y)
   ret half %r

diff  --git a/llvm/test/CodeGen/X86/extractelement-fp.ll b/llvm/test/CodeGen/X86/extractelement-fp.ll
index 407da784dd79..5bc6d00022e2 100644
--- a/llvm/test/CodeGen/X86/extractelement-fp.ll
+++ b/llvm/test/CodeGen/X86/extractelement-fp.ll
@@ -677,35 +677,35 @@ define float @fmaximum_v4f32(<4 x float> %x, <4 x float> %y) nounwind {
 ; X64:       # %bb.0:
 ; X64-NEXT:    vmovd %xmm0, %eax
 ; X64-NEXT:    testl %eax, %eax
-; X64-NEXT:    je .LBB30_1
+; X64-NEXT:    js .LBB30_1
 ; X64-NEXT:  # %bb.2:
-; X64-NEXT:    vmovdqa %xmm1, %xmm2
+; X64-NEXT:    vmovdqa %xmm0, %xmm2
 ; X64-NEXT:    jmp .LBB30_3
 ; X64-NEXT:  .LBB30_1:
-; X64-NEXT:    vmovdqa %xmm0, %xmm2
-; X64-NEXT:    vmovdqa %xmm1, %xmm0
+; X64-NEXT:    vmovdqa %xmm1, %xmm2
+; X64-NEXT:    vmovdqa %xmm0, %xmm1
 ; X64-NEXT:  .LBB30_3:
-; X64-NEXT:    vmaxss %xmm2, %xmm0, %xmm1
-; X64-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm2
-; X64-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; X64-NEXT:    vmaxss %xmm2, %xmm1, %xmm0
+; X64-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
+; X64-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: fmaximum_v4f32:
 ; X86:       # %bb.0:
 ; X86-NEXT:    vmovd %xmm0, %eax
 ; X86-NEXT:    testl %eax, %eax
-; X86-NEXT:    je .LBB30_1
+; X86-NEXT:    js .LBB30_1
 ; X86-NEXT:  # %bb.2:
-; X86-NEXT:    vmovdqa %xmm1, %xmm2
+; X86-NEXT:    vmovdqa %xmm0, %xmm2
 ; X86-NEXT:    jmp .LBB30_3
 ; X86-NEXT:  .LBB30_1:
-; X86-NEXT:    vmovdqa %xmm0, %xmm2
-; X86-NEXT:    vmovdqa %xmm1, %xmm0
+; X86-NEXT:    vmovdqa %xmm1, %xmm2
+; X86-NEXT:    vmovdqa %xmm0, %xmm1
 ; X86-NEXT:  .LBB30_3:
 ; X86-NEXT:    pushl %eax
-; X86-NEXT:    vmaxss %xmm2, %xmm0, %xmm1
-; X86-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm2
-; X86-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; X86-NEXT:    vmaxss %xmm2, %xmm1, %xmm0
+; X86-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
+; X86-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0
 ; X86-NEXT:    vmovss %xmm0, (%esp)
 ; X86-NEXT:    flds (%esp)
 ; X86-NEXT:    popl %eax
@@ -720,40 +720,39 @@ define double @fmaximum_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
 ; X64:       # %bb.0:
 ; X64-NEXT:    vmovq %xmm0, %rax
 ; X64-NEXT:    testq %rax, %rax
-; X64-NEXT:    je .LBB31_1
+; X64-NEXT:    js .LBB31_1
 ; X64-NEXT:  # %bb.2:
-; X64-NEXT:    vmovdqa %xmm1, %xmm2
+; X64-NEXT:    vmovdqa %xmm0, %xmm2
 ; X64-NEXT:    jmp .LBB31_3
 ; X64-NEXT:  .LBB31_1:
-; X64-NEXT:    vmovdqa %xmm0, %xmm2
-; X64-NEXT:    vmovdqa %xmm1, %xmm0
+; X64-NEXT:    vmovdqa %xmm1, %xmm2
+; X64-NEXT:    vmovdqa %xmm0, %xmm1
 ; X64-NEXT:  .LBB31_3:
-; X64-NEXT:    vmaxsd %xmm2, %xmm0, %xmm1
-; X64-NEXT:    vcmpunordsd %xmm0, %xmm0, %xmm2
-; X64-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; X64-NEXT:    vmaxsd %xmm2, %xmm1, %xmm0
+; X64-NEXT:    vcmpunordsd %xmm1, %xmm1, %xmm2
+; X64-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
 ; X64-NEXT:    vzeroupper
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: fmaximum_v4f64:
 ; X86:       # %bb.0:
-; X86-NEXT:    vpextrd $1, %xmm0, %eax
-; X86-NEXT:    vmovd %xmm0, %ecx
-; X86-NEXT:    orl %eax, %ecx
-; X86-NEXT:    je .LBB31_1
+; X86-NEXT:    vextractps $1, %xmm0, %eax
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    js .LBB31_1
 ; X86-NEXT:  # %bb.2:
-; X86-NEXT:    vmovdqa %xmm1, %xmm2
+; X86-NEXT:    vmovapd %xmm0, %xmm2
 ; X86-NEXT:    jmp .LBB31_3
 ; X86-NEXT:  .LBB31_1:
-; X86-NEXT:    vmovdqa %xmm0, %xmm2
-; X86-NEXT:    vmovdqa %xmm1, %xmm0
+; X86-NEXT:    vmovapd %xmm1, %xmm2
+; X86-NEXT:    vmovapd %xmm0, %xmm1
 ; X86-NEXT:  .LBB31_3:
 ; X86-NEXT:    pushl %ebp
 ; X86-NEXT:    movl %esp, %ebp
 ; X86-NEXT:    andl $-8, %esp
 ; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    vmaxsd %xmm2, %xmm0, %xmm1
-; X86-NEXT:    vcmpunordsd %xmm0, %xmm0, %xmm2
-; X86-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; X86-NEXT:    vmaxsd %xmm2, %xmm1, %xmm0
+; X86-NEXT:    vcmpunordsd %xmm1, %xmm1, %xmm2
+; X86-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
 ; X86-NEXT:    vmovlpd %xmm0, (%esp)
 ; X86-NEXT:    fldl (%esp)
 ; X86-NEXT:    movl %ebp, %esp
@@ -769,8 +768,8 @@ define float @fminimum_v4f32(<4 x float> %x, <4 x float> %y) nounwind {
 ; X64-LABEL: fminimum_v4f32:
 ; X64:       # %bb.0:
 ; X64-NEXT:    vmovd %xmm0, %eax
-; X64-NEXT:    cmpl $-2147483648, %eax # imm = 0x80000000
-; X64-NEXT:    je .LBB32_1
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    js .LBB32_1
 ; X64-NEXT:  # %bb.2:
 ; X64-NEXT:    vmovdqa %xmm1, %xmm2
 ; X64-NEXT:    jmp .LBB32_3
@@ -786,8 +785,8 @@ define float @fminimum_v4f32(<4 x float> %x, <4 x float> %y) nounwind {
 ; X86-LABEL: fminimum_v4f32:
 ; X86:       # %bb.0:
 ; X86-NEXT:    vmovd %xmm0, %eax
-; X86-NEXT:    cmpl $-2147483648, %eax # imm = 0x80000000
-; X86-NEXT:    je .LBB32_1
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    js .LBB32_1
 ; X86-NEXT:  # %bb.2:
 ; X86-NEXT:    vmovdqa %xmm1, %xmm2
 ; X86-NEXT:    jmp .LBB32_3
@@ -812,9 +811,8 @@ define double @fminimum_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
 ; X64-LABEL: fminimum_v4f64:
 ; X64:       # %bb.0:
 ; X64-NEXT:    vmovq %xmm0, %rax
-; X64-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
-; X64-NEXT:    cmpq %rcx, %rax
-; X64-NEXT:    je .LBB33_1
+; X64-NEXT:    testq %rax, %rax
+; X64-NEXT:    js .LBB33_1
 ; X64-NEXT:  # %bb.2:
 ; X64-NEXT:    vmovdqa %xmm1, %xmm2
 ; X64-NEXT:    jmp .LBB33_3
@@ -830,17 +828,15 @@ define double @fminimum_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
 ;
 ; X86-LABEL: fminimum_v4f64:
 ; X86:       # %bb.0:
-; X86-NEXT:    vmovd %xmm0, %eax
-; X86-NEXT:    vpextrd $1, %xmm0, %ecx
-; X86-NEXT:    addl $-2147483648, %ecx # imm = 0x80000000
-; X86-NEXT:    orl %eax, %ecx
-; X86-NEXT:    je .LBB33_1
+; X86-NEXT:    vextractps $1, %xmm0, %eax
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    js .LBB33_1
 ; X86-NEXT:  # %bb.2:
-; X86-NEXT:    vmovdqa %xmm1, %xmm2
+; X86-NEXT:    vmovapd %xmm1, %xmm2
 ; X86-NEXT:    jmp .LBB33_3
 ; X86-NEXT:  .LBB33_1:
-; X86-NEXT:    vmovdqa %xmm0, %xmm2
-; X86-NEXT:    vmovdqa %xmm1, %xmm0
+; X86-NEXT:    vmovapd %xmm0, %xmm2
+; X86-NEXT:    vmovapd %xmm1, %xmm0
 ; X86-NEXT:  .LBB33_3:
 ; X86-NEXT:    pushl %ebp
 ; X86-NEXT:    movl %esp, %ebp

diff  --git a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
index e62a9cba6d95..94e3afdbbec6 100644
--- a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
@@ -19,75 +19,74 @@ declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>)
 define float @test_fmaximum(float %x, float %y) nounwind {
 ; SSE2-LABEL: test_fmaximum:
 ; SSE2:       # %bb.0:
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
 ; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    testl %eax, %eax
-; SSE2-NEXT:    movdqa %xmm1, %xmm3
-; SSE2-NEXT:    je .LBB0_2
-; SSE2-NEXT:  # %bb.1:
 ; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    js .LBB0_2
+; SSE2-NEXT:  # %bb.1:
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
 ; SSE2-NEXT:  .LBB0_2:
-; SSE2-NEXT:    movdqa %xmm3, %xmm2
-; SSE2-NEXT:    cmpunordss %xmm3, %xmm2
-; SSE2-NEXT:    movaps %xmm2, %xmm4
+; SSE2-NEXT:    movdqa %xmm3, %xmm0
+; SSE2-NEXT:    cmpunordss %xmm3, %xmm0
+; SSE2-NEXT:    movaps %xmm0, %xmm4
 ; SSE2-NEXT:    andps %xmm3, %xmm4
-; SSE2-NEXT:    je .LBB0_4
+; SSE2-NEXT:    js .LBB0_4
 ; SSE2-NEXT:  # %bb.3:
-; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    movdqa %xmm2, %xmm1
 ; SSE2-NEXT:  .LBB0_4:
-; SSE2-NEXT:    maxss %xmm0, %xmm3
-; SSE2-NEXT:    andnps %xmm3, %xmm2
-; SSE2-NEXT:    orps %xmm4, %xmm2
-; SSE2-NEXT:    movaps %xmm2, %xmm0
+; SSE2-NEXT:    maxss %xmm1, %xmm3
+; SSE2-NEXT:    andnps %xmm3, %xmm0
+; SSE2-NEXT:    orps %xmm4, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX1-LABEL: test_fmaximum:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovd %xmm0, %eax
 ; AVX1-NEXT:    testl %eax, %eax
-; AVX1-NEXT:    je .LBB0_1
+; AVX1-NEXT:    js .LBB0_1
 ; AVX1-NEXT:  # %bb.2:
-; AVX1-NEXT:    vmovdqa %xmm1, %xmm2
+; AVX1-NEXT:    vmovdqa %xmm0, %xmm2
 ; AVX1-NEXT:    jmp .LBB0_3
 ; AVX1-NEXT:  .LBB0_1:
-; AVX1-NEXT:    vmovdqa %xmm0, %xmm2
-; AVX1-NEXT:    vmovdqa %xmm1, %xmm0
+; AVX1-NEXT:    vmovdqa %xmm1, %xmm2
+; AVX1-NEXT:    vmovdqa %xmm0, %xmm1
 ; AVX1-NEXT:  .LBB0_3:
-; AVX1-NEXT:    vmaxss %xmm2, %xmm0, %xmm1
-; AVX1-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm2
-; AVX1-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vmaxss %xmm2, %xmm1, %xmm0
+; AVX1-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
+; AVX1-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX512-LABEL: test_fmaximum:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    testl %eax, %eax
-; AVX512-NEXT:    sete %al
+; AVX512-NEXT:    sets %al
 ; AVX512-NEXT:    kmovw %eax, %k1
-; AVX512-NEXT:    vmovaps %xmm1, %xmm2
-; AVX512-NEXT:    vmovss %xmm0, %xmm2, %xmm2 {%k1}
-; AVX512-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
-; AVX512-NEXT:    vmaxss %xmm2, %xmm0, %xmm1
-; AVX512-NEXT:    vcmpunordss %xmm0, %xmm0, %k1
+; AVX512-NEXT:    vmovdqa %xmm0, %xmm2
+; AVX512-NEXT:    vmovss %xmm1, %xmm2, %xmm2 {%k1}
 ; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
-; AVX512-NEXT:    vmovaps %xmm1, %xmm0
+; AVX512-NEXT:    vmaxss %xmm2, %xmm1, %xmm0
+; AVX512-NEXT:    vcmpunordss %xmm1, %xmm1, %k1
+; AVX512-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
 ; AVX512-NEXT:    retq
 ;
 ; X86-LABEL: test_fmaximum:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %eax
-; X86-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; X86-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-NEXT:    vmovd %xmm0, %eax
+; X86-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X86-NEXT:    vmovd %xmm2, %eax
 ; X86-NEXT:    testl %eax, %eax
-; X86-NEXT:    je .LBB0_1
+; X86-NEXT:    js .LBB0_1
 ; X86-NEXT:  # %bb.2:
-; X86-NEXT:    vmovdqa %xmm1, %xmm2
+; X86-NEXT:    vmovdqa %xmm2, %xmm1
 ; X86-NEXT:    jmp .LBB0_3
 ; X86-NEXT:  .LBB0_1:
-; X86-NEXT:    vmovdqa %xmm0, %xmm2
-; X86-NEXT:    vmovdqa %xmm1, %xmm0
+; X86-NEXT:    vmovdqa %xmm0, %xmm1
+; X86-NEXT:    vmovdqa %xmm2, %xmm0
 ; X86-NEXT:  .LBB0_3:
-; X86-NEXT:    vmaxss %xmm2, %xmm0, %xmm1
+; X86-NEXT:    vmaxss %xmm1, %xmm0, %xmm1
 ; X86-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm2
 ; X86-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; X86-NEXT:    vmovss %xmm0, (%esp)
@@ -201,11 +200,11 @@ define float @test_fmaximum_nnan(float %x, float %y) nounwind {
 ; SSE2-LABEL: test_fmaximum_nnan:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movaps %xmm0, %xmm2
-; SSE2-NEXT:    addss %xmm1, %xmm0
-; SSE2-NEXT:    subss %xmm1, %xmm2
-; SSE2-NEXT:    movd %xmm0, %eax
+; SSE2-NEXT:    addss %xmm1, %xmm2
+; SSE2-NEXT:    subss %xmm1, %xmm0
+; SSE2-NEXT:    movd %xmm2, %eax
 ; SSE2-NEXT:    testl %eax, %eax
-; SSE2-NEXT:    je .LBB4_1
+; SSE2-NEXT:    js .LBB4_1
 ; SSE2-NEXT:  # %bb.2:
 ; SSE2-NEXT:    maxss %xmm2, %xmm0
 ; SSE2-NEXT:    retq
@@ -218,16 +217,16 @@ define float @test_fmaximum_nnan(float %x, float %y) nounwind {
 ; AVX1-LABEL: test_fmaximum_nnan:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vaddss %xmm1, %xmm0, %xmm2
-; AVX1-NEXT:    vsubss %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vsubss %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vmovd %xmm2, %eax
 ; AVX1-NEXT:    testl %eax, %eax
-; AVX1-NEXT:    je .LBB4_1
+; AVX1-NEXT:    js .LBB4_1
 ; AVX1-NEXT:  # %bb.2:
-; AVX1-NEXT:    vmaxss %xmm1, %xmm2, %xmm0
+; AVX1-NEXT:    vmaxss %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ; AVX1-NEXT:  .LBB4_1:
-; AVX1-NEXT:    vmovaps %xmm2, %xmm0
-; AVX1-NEXT:    vmaxss %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vmovaps %xmm0, %xmm1
+; AVX1-NEXT:    vmaxss %xmm1, %xmm2, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX512F-LABEL: test_fmaximum_nnan:
@@ -236,12 +235,12 @@ define float @test_fmaximum_nnan(float %x, float %y) nounwind {
 ; AVX512F-NEXT:    vsubss %xmm1, %xmm0, %xmm0
 ; AVX512F-NEXT:    vmovd %xmm2, %eax
 ; AVX512F-NEXT:    testl %eax, %eax
-; AVX512F-NEXT:    sete %al
+; AVX512F-NEXT:    sets %al
 ; AVX512F-NEXT:    kmovw %eax, %k1
-; AVX512F-NEXT:    vmovaps %xmm0, %xmm1
-; AVX512F-NEXT:    vmovss %xmm2, %xmm1, %xmm1 {%k1}
-; AVX512F-NEXT:    vmovss %xmm0, %xmm2, %xmm2 {%k1}
-; AVX512F-NEXT:    vmaxss %xmm1, %xmm2, %xmm0
+; AVX512F-NEXT:    vmovaps %xmm2, %xmm1
+; AVX512F-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
+; AVX512F-NEXT:    vmovss %xmm2, %xmm0, %xmm0 {%k1}
+; AVX512F-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: test_fmaximum_nnan:
@@ -259,21 +258,21 @@ define float @test_fmaximum_nnan(float %x, float %y) nounwind {
 ; X86-LABEL: test_fmaximum_nnan:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %eax
-; X86-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X86-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X86-NEXT:    vaddss %xmm1, %xmm2, %xmm0
-; X86-NEXT:    vsubss %xmm1, %xmm2, %xmm2
-; X86-NEXT:    vmovd %xmm0, %eax
+; X86-NEXT:    vaddss %xmm0, %xmm2, %xmm1
+; X86-NEXT:    vsubss %xmm0, %xmm2, %xmm0
+; X86-NEXT:    vmovd %xmm1, %eax
 ; X86-NEXT:    testl %eax, %eax
-; X86-NEXT:    je .LBB4_1
+; X86-NEXT:    js .LBB4_1
 ; X86-NEXT:  # %bb.2:
-; X86-NEXT:    vmovaps %xmm2, %xmm1
+; X86-NEXT:    vmovaps %xmm1, %xmm2
 ; X86-NEXT:    jmp .LBB4_3
 ; X86-NEXT:  .LBB4_1:
-; X86-NEXT:    vmovaps %xmm0, %xmm1
-; X86-NEXT:    vmovaps %xmm2, %xmm0
+; X86-NEXT:    vmovaps %xmm0, %xmm2
+; X86-NEXT:    vmovaps %xmm1, %xmm0
 ; X86-NEXT:  .LBB4_3:
-; X86-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
+; X86-NEXT:    vmaxss %xmm2, %xmm0, %xmm0
 ; X86-NEXT:    vmovss %xmm0, (%esp)
 ; X86-NEXT:    flds (%esp)
 ; X86-NEXT:    popl %eax
@@ -452,20 +451,20 @@ define float @test_fmaximum_combine_cmps(float %x, float %y) nounwind {
 ; SSE2-NEXT:    divss %xmm0, %xmm1
 ; SSE2-NEXT:    movd %xmm0, %eax
 ; SSE2-NEXT:    testl %eax, %eax
-; SSE2-NEXT:    movaps %xmm1, %xmm3
-; SSE2-NEXT:    je .LBB9_2
-; SSE2-NEXT:  # %bb.1:
 ; SSE2-NEXT:    movaps %xmm0, %xmm3
+; SSE2-NEXT:    js .LBB9_2
+; SSE2-NEXT:  # %bb.1:
+; SSE2-NEXT:    movaps %xmm1, %xmm3
 ; SSE2-NEXT:  .LBB9_2:
 ; SSE2-NEXT:    movaps %xmm3, %xmm2
 ; SSE2-NEXT:    cmpunordss %xmm3, %xmm2
 ; SSE2-NEXT:    movaps %xmm2, %xmm4
 ; SSE2-NEXT:    andps %xmm3, %xmm4
-; SSE2-NEXT:    je .LBB9_4
+; SSE2-NEXT:    js .LBB9_4
 ; SSE2-NEXT:  # %bb.3:
-; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    movaps %xmm0, %xmm1
 ; SSE2-NEXT:  .LBB9_4:
-; SSE2-NEXT:    maxss %xmm0, %xmm3
+; SSE2-NEXT:    maxss %xmm1, %xmm3
 ; SSE2-NEXT:    andnps %xmm3, %xmm2
 ; SSE2-NEXT:    orps %xmm4, %xmm2
 ; SSE2-NEXT:    movaps %xmm2, %xmm0
@@ -473,20 +472,20 @@ define float @test_fmaximum_combine_cmps(float %x, float %y) nounwind {
 ;
 ; AVX1-LABEL: test_fmaximum_combine_cmps:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vdivss %xmm0, %xmm1, %xmm2
+; AVX1-NEXT:    vdivss %xmm0, %xmm1, %xmm1
 ; AVX1-NEXT:    vmovd %xmm0, %eax
 ; AVX1-NEXT:    testl %eax, %eax
-; AVX1-NEXT:    je .LBB9_1
+; AVX1-NEXT:    js .LBB9_1
 ; AVX1-NEXT:  # %bb.2:
-; AVX1-NEXT:    vmovaps %xmm2, %xmm1
+; AVX1-NEXT:    vmovaps %xmm0, %xmm2
 ; AVX1-NEXT:    jmp .LBB9_3
 ; AVX1-NEXT:  .LBB9_1:
+; AVX1-NEXT:    vmovaps %xmm1, %xmm2
 ; AVX1-NEXT:    vmovaps %xmm0, %xmm1
-; AVX1-NEXT:    vmovaps %xmm2, %xmm0
 ; AVX1-NEXT:  .LBB9_3:
-; AVX1-NEXT:    vmaxss %xmm1, %xmm0, %xmm1
-; AVX1-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm2
-; AVX1-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vmaxss %xmm2, %xmm1, %xmm0
+; AVX1-NEXT:    vcmpunordss %xmm1, %xmm1, %xmm2
+; AVX1-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX512F-LABEL: test_fmaximum_combine_cmps:
@@ -494,15 +493,14 @@ define float @test_fmaximum_combine_cmps(float %x, float %y) nounwind {
 ; AVX512F-NEXT:    vdivss %xmm0, %xmm1, %xmm1
 ; AVX512F-NEXT:    vmovd %xmm0, %eax
 ; AVX512F-NEXT:    testl %eax, %eax
-; AVX512F-NEXT:    sete %al
+; AVX512F-NEXT:    sets %al
 ; AVX512F-NEXT:    kmovw %eax, %k1
-; AVX512F-NEXT:    vmovaps %xmm1, %xmm2
-; AVX512F-NEXT:    vmovss %xmm0, %xmm2, %xmm2 {%k1}
-; AVX512F-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
-; AVX512F-NEXT:    vmaxss %xmm2, %xmm0, %xmm1
-; AVX512F-NEXT:    vcmpunordss %xmm0, %xmm0, %k1
+; AVX512F-NEXT:    vmovaps %xmm0, %xmm2
+; AVX512F-NEXT:    vmovss %xmm1, %xmm2, %xmm2 {%k1}
 ; AVX512F-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
-; AVX512F-NEXT:    vmovaps %xmm1, %xmm0
+; AVX512F-NEXT:    vmaxss %xmm2, %xmm1, %xmm0
+; AVX512F-NEXT:    vcmpunordss %xmm1, %xmm1, %k1
+; AVX512F-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: test_fmaximum_combine_cmps:
@@ -519,20 +517,20 @@ define float @test_fmaximum_combine_cmps(float %x, float %y) nounwind {
 ; X86-LABEL: test_fmaximum_combine_cmps:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %eax
-; X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X86-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X86-NEXT:    vdivss %xmm0, %xmm1, %xmm2
-; X86-NEXT:    vmovd %xmm0, %eax
+; X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT:    vdivss %xmm1, %xmm0, %xmm0
+; X86-NEXT:    vmovd %xmm1, %eax
 ; X86-NEXT:    testl %eax, %eax
-; X86-NEXT:    je .LBB9_1
+; X86-NEXT:    js .LBB9_1
 ; X86-NEXT:  # %bb.2:
-; X86-NEXT:    vmovaps %xmm2, %xmm1
+; X86-NEXT:    vmovaps %xmm1, %xmm2
 ; X86-NEXT:    jmp .LBB9_3
 ; X86-NEXT:  .LBB9_1:
-; X86-NEXT:    vmovaps %xmm0, %xmm1
-; X86-NEXT:    vmovaps %xmm2, %xmm0
+; X86-NEXT:    vmovaps %xmm0, %xmm2
+; X86-NEXT:    vmovaps %xmm1, %xmm0
 ; X86-NEXT:  .LBB9_3:
-; X86-NEXT:    vmaxss %xmm1, %xmm0, %xmm1
+; X86-NEXT:    vmaxss %xmm2, %xmm0, %xmm1
 ; X86-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm2
 ; X86-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
 ; X86-NEXT:    vmovss %xmm0, (%esp)
@@ -552,9 +550,9 @@ define float @test_fminimum(float %x, float %y) nounwind {
 ; SSE2-LABEL: test_fminimum:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movd %xmm0, %eax
-; SSE2-NEXT:    cmpl $-2147483648, %eax # imm = 0x80000000
+; SSE2-NEXT:    testl %eax, %eax
 ; SSE2-NEXT:    movdqa %xmm1, %xmm3
-; SSE2-NEXT:    je .LBB10_2
+; SSE2-NEXT:    js .LBB10_2
 ; SSE2-NEXT:  # %bb.1:
 ; SSE2-NEXT:    movdqa %xmm0, %xmm3
 ; SSE2-NEXT:  .LBB10_2:
@@ -562,7 +560,7 @@ define float @test_fminimum(float %x, float %y) nounwind {
 ; SSE2-NEXT:    cmpunordss %xmm3, %xmm2
 ; SSE2-NEXT:    movaps %xmm2, %xmm4
 ; SSE2-NEXT:    andps %xmm3, %xmm4
-; SSE2-NEXT:    je .LBB10_4
+; SSE2-NEXT:    js .LBB10_4
 ; SSE2-NEXT:  # %bb.3:
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:  .LBB10_4:
@@ -575,8 +573,8 @@ define float @test_fminimum(float %x, float %y) nounwind {
 ; AVX1-LABEL: test_fminimum:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovd %xmm0, %eax
-; AVX1-NEXT:    cmpl $-2147483648, %eax # imm = 0x80000000
-; AVX1-NEXT:    je .LBB10_1
+; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    js .LBB10_1
 ; AVX1-NEXT:  # %bb.2:
 ; AVX1-NEXT:    vmovdqa %xmm1, %xmm2
 ; AVX1-NEXT:    jmp .LBB10_3
@@ -592,8 +590,8 @@ define float @test_fminimum(float %x, float %y) nounwind {
 ; AVX512-LABEL: test_fminimum:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vmovd %xmm0, %eax
-; AVX512-NEXT:    cmpl $-2147483648, %eax # imm = 0x80000000
-; AVX512-NEXT:    sete %al
+; AVX512-NEXT:    testl %eax, %eax
+; AVX512-NEXT:    sets %al
 ; AVX512-NEXT:    kmovw %eax, %k1
 ; AVX512-NEXT:    vmovaps %xmm1, %xmm2
 ; AVX512-NEXT:    vmovss %xmm0, %xmm2, %xmm2 {%k1}
@@ -610,8 +608,8 @@ define float @test_fminimum(float %x, float %y) nounwind {
 ; X86-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; X86-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X86-NEXT:    vmovd %xmm0, %eax
-; X86-NEXT:    cmpl $-2147483648, %eax # imm = 0x80000000
-; X86-NEXT:    je .LBB10_1
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    js .LBB10_1
 ; X86-NEXT:  # %bb.2:
 ; X86-NEXT:    vmovdqa %xmm1, %xmm2
 ; X86-NEXT:    jmp .LBB10_3
@@ -705,9 +703,8 @@ define double @test_fminimum_nnan(double %x, double %y) "no-nans-fp-math"="true"
 ; SSE2-LABEL: test_fminimum_nnan:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movq %xmm0, %rax
-; SSE2-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
-; SSE2-NEXT:    cmpq %rcx, %rax
-; SSE2-NEXT:    je .LBB14_1
+; SSE2-NEXT:    testq %rax, %rax
+; SSE2-NEXT:    js .LBB14_1
 ; SSE2-NEXT:  # %bb.2:
 ; SSE2-NEXT:    minsd %xmm1, %xmm0
 ; SSE2-NEXT:    retq
@@ -720,9 +717,8 @@ define double @test_fminimum_nnan(double %x, double %y) "no-nans-fp-math"="true"
 ; AVX1-LABEL: test_fminimum_nnan:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vmovq %xmm0, %rax
-; AVX1-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
-; AVX1-NEXT:    cmpq %rcx, %rax
-; AVX1-NEXT:    je .LBB14_1
+; AVX1-NEXT:    testq %rax, %rax
+; AVX1-NEXT:    js .LBB14_1
 ; AVX1-NEXT:  # %bb.2:
 ; AVX1-NEXT:    vminsd %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
@@ -734,9 +730,8 @@ define double @test_fminimum_nnan(double %x, double %y) "no-nans-fp-math"="true"
 ; AVX512F-LABEL: test_fminimum_nnan:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vmovq %xmm0, %rax
-; AVX512F-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
-; AVX512F-NEXT:    cmpq %rcx, %rax
-; AVX512F-NEXT:    sete %al
+; AVX512F-NEXT:    testq %rax, %rax
+; AVX512F-NEXT:    sets %al
 ; AVX512F-NEXT:    kmovw %eax, %k1
 ; AVX512F-NEXT:    vmovapd %xmm1, %xmm2
 ; AVX512F-NEXT:    vmovsd %xmm0, %xmm2, %xmm2 {%k1}
@@ -762,12 +757,10 @@ define double @test_fminimum_nnan(double %x, double %y) "no-nans-fp-math"="true"
 ; X86-NEXT:    subl $8, %esp
 ; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; X86-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; X86-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
-; X86-NEXT:    vmovd %xmm2, %eax
-; X86-NEXT:    vpextrd $1, %xmm2, %ecx
-; X86-NEXT:    addl $-2147483648, %ecx # imm = 0x80000000
-; X86-NEXT:    orl %eax, %ecx
-; X86-NEXT:    je .LBB14_1
+; X86-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
+; X86-NEXT:    vextractps $1, %xmm2, %eax
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    js .LBB14_1
 ; X86-NEXT:  # %bb.2:
 ; X86-NEXT:    vmovapd %xmm1, %xmm2
 ; X86-NEXT:    jmp .LBB14_3
@@ -945,9 +938,9 @@ define float @test_fminimum_combine_cmps(float %x, float %y) nounwind {
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    divss %xmm0, %xmm1
 ; SSE2-NEXT:    movd %xmm0, %eax
-; SSE2-NEXT:    cmpl $-2147483648, %eax # imm = 0x80000000
+; SSE2-NEXT:    testl %eax, %eax
 ; SSE2-NEXT:    movaps %xmm1, %xmm3
-; SSE2-NEXT:    je .LBB19_2
+; SSE2-NEXT:    js .LBB19_2
 ; SSE2-NEXT:  # %bb.1:
 ; SSE2-NEXT:    movaps %xmm0, %xmm3
 ; SSE2-NEXT:  .LBB19_2:
@@ -955,7 +948,7 @@ define float @test_fminimum_combine_cmps(float %x, float %y) nounwind {
 ; SSE2-NEXT:    cmpunordss %xmm3, %xmm2
 ; SSE2-NEXT:    movaps %xmm2, %xmm4
 ; SSE2-NEXT:    andps %xmm3, %xmm4
-; SSE2-NEXT:    je .LBB19_4
+; SSE2-NEXT:    js .LBB19_4
 ; SSE2-NEXT:  # %bb.3:
 ; SSE2-NEXT:    movaps %xmm1, %xmm0
 ; SSE2-NEXT:  .LBB19_4:
@@ -969,8 +962,8 @@ define float @test_fminimum_combine_cmps(float %x, float %y) nounwind {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vdivss %xmm0, %xmm1, %xmm2
 ; AVX1-NEXT:    vmovd %xmm0, %eax
-; AVX1-NEXT:    cmpl $-2147483648, %eax # imm = 0x80000000
-; AVX1-NEXT:    je .LBB19_1
+; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    js .LBB19_1
 ; AVX1-NEXT:  # %bb.2:
 ; AVX1-NEXT:    vmovaps %xmm2, %xmm1
 ; AVX1-NEXT:    jmp .LBB19_3
@@ -987,8 +980,8 @@ define float @test_fminimum_combine_cmps(float %x, float %y) nounwind {
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vdivss %xmm0, %xmm1, %xmm1
 ; AVX512F-NEXT:    vmovd %xmm0, %eax
-; AVX512F-NEXT:    cmpl $-2147483648, %eax # imm = 0x80000000
-; AVX512F-NEXT:    sete %al
+; AVX512F-NEXT:    testl %eax, %eax
+; AVX512F-NEXT:    sets %al
 ; AVX512F-NEXT:    kmovw %eax, %k1
 ; AVX512F-NEXT:    vmovaps %xmm1, %xmm2
 ; AVX512F-NEXT:    vmovss %xmm0, %xmm2, %xmm2 {%k1}
@@ -1017,8 +1010,8 @@ define float @test_fminimum_combine_cmps(float %x, float %y) nounwind {
 ; X86-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; X86-NEXT:    vdivss %xmm0, %xmm1, %xmm2
 ; X86-NEXT:    vmovd %xmm0, %eax
-; X86-NEXT:    cmpl $-2147483648, %eax # imm = 0x80000000
-; X86-NEXT:    je .LBB19_1
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    js .LBB19_1
 ; X86-NEXT:  # %bb.2:
 ; X86-NEXT:    vmovaps %xmm2, %xmm1
 ; X86-NEXT:    jmp .LBB19_3


        


More information about the llvm-commits mailing list