[llvm] 0d3f782 - [FPEnv][X86] More strict int <-> FP conversion fixes
Ulrich Weigand via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 23 12:15:37 PST 2019
Author: Ulrich Weigand
Date: 2019-12-23T21:11:45+01:00
New Revision: 0d3f782e413c1b13d407f67afbb6330b1091fef0
URL: https://github.com/llvm/llvm-project/commit/0d3f782e413c1b13d407f67afbb6330b1091fef0
DIFF: https://github.com/llvm/llvm-project/commit/0d3f782e413c1b13d407f67afbb6330b1091fef0.diff
LOG: [FPEnv][X86] More strict int <-> FP conversion fixes
Fix several several additional problems with the int <-> FP conversion
logic both in common code and in the X86 target. In particular:
- The STRICT_FP_TO_UINT expansion emits a floating-point compare. This
compare can raise exceptions and therefore needs to be a strict compare.
I've made it signaling (even though quiet would also be correct) as
signaling is the more usual default for an LT. This code exists both
in common code and in the X86 target.
- The STRICT_UINT_TO_FP expansion algorithm was incorrect for strict mode:
it emitted two STRICT_SINT_TO_FP nodes and then used a select to choose one
of the results. This can cause spurious exceptions by the STRICT_SINT_TO_FP
that ends up not chosen. I've fixed the algorithm to use only a single
STRICT_SINT_TO_FP instead.
- The !isStrictFPEnabled logic in DoInstructionSelection would sometimes do
the wrong thing because it calls getOperationAction using the result VT.
But for some opcodes, incuding [SU]INT_TO_FP, getOperationAction needs to
be called using the operand VT.
- Remove some (obsolete) code in X86DAGToDAGISel::Select that would mutate
STRICT_FP_TO_[SU]INT to non-strict versions unnecessarily.
Reviewed by: craig.topper
Differential Revision: https://reviews.llvm.org/D71840
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86InstrAVX512.td
llvm/lib/Target/X86/X86InstrSSE.td
llvm/test/CodeGen/SystemZ/fp-strict-conv-10.ll
llvm/test/CodeGen/SystemZ/fp-strict-conv-12.ll
llvm/test/CodeGen/X86/fp-intrinsics-flags.ll
llvm/test/CodeGen/X86/fp-intrinsics.ll
llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll
llvm/test/CodeGen/X86/fp80-strict-scalar.ll
llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll
llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll
llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 2b5ea545f02e..b25a9ab854a5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -1162,10 +1162,30 @@ void SelectionDAGISel::DoInstructionSelection() {
// we convert them to normal FP opcodes instead at this point. This
// will allow them to be handled by existing target-specific instruction
// selectors.
- if (!TLI->isStrictFPEnabled() && Node->isStrictFPOpcode() &&
- (TLI->getOperationAction(Node->getOpcode(), Node->getValueType(0))
- == TargetLowering::Expand))
- Node = CurDAG->mutateStrictFPToFP(Node);
+ if (!TLI->isStrictFPEnabled() && Node->isStrictFPOpcode()) {
+ // For some opcodes, we need to call TLI->getOperationAction using
+ // the first operand type instead of the result type. Note that this
+ // must match what SelectionDAGLegalize::LegalizeOp is doing.
+ EVT ActionVT;
+ switch (Node->getOpcode()) {
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ case ISD::STRICT_LRINT:
+ case ISD::STRICT_LLRINT:
+ case ISD::STRICT_LROUND:
+ case ISD::STRICT_LLROUND:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
+ ActionVT = Node->getOperand(1).getValueType();
+ break;
+ default:
+ ActionVT = Node->getValueType(0);
+ break;
+ }
+ if (TLI->getOperationAction(Node->getOpcode(), ActionVT)
+ == TargetLowering::Expand)
+ Node = CurDAG->mutateStrictFPToFP(Node);
+ }
LLVM_DEBUG(dbgs() << "\nISEL: Starting selection on root node: ";
Node->dump(CurDAG));
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index c3d6d329d670..3fc215476540 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -6074,7 +6074,13 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
}
SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
- SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
+ SDValue Sel;
+
+ if (Node->isStrictFPOpcode())
+ Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
+ Node->getOperand(0), /*IsSignaling*/ true);
+ else
+ Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
bool Strict = Node->isStrictFPOpcode() ||
shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
@@ -6149,13 +6155,16 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
// For unsigned conversions, convert them to signed conversions using the
// algorithm from the x86_64 __floatundidf in compiler_rt.
- SDValue Fast;
- if (Node->isStrictFPOpcode()) {
- Fast = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {DstVT, MVT::Other},
- {Node->getOperand(0), Src});
- Chain = SDValue(Fast.getNode(), 1);
- } else
- Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
+
+ // TODO: This really should be implemented using a branch rather than a
+ // select. We happen to get lucky and machinesink does the right
+ // thing most of the time. This would be a good candidate for a
+ // pseudo-op, or, even better, for whole-function isel.
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
+
+ SDValue SignBitTest = DAG.getSetCC(
+ dl, SetCCVT, Src, DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst);
@@ -6163,27 +6172,28 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst);
SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);
- SDValue Slow;
+ SDValue Slow, Fast;
if (Node->isStrictFPOpcode()) {
- SDValue SignCvt = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl,
- {DstVT, MVT::Other}, {Chain, Or});
+ // In strict mode, we must avoid spurious exceptions, and therefore
+ // must make sure to only emit a single STRICT_SINT_TO_FP.
+ SDValue InCvt = DAG.getSelect(dl, SrcVT, SignBitTest, Or, Src);
+ Fast = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DstVT, MVT::Other },
+ { Node->getOperand(0), InCvt });
Slow = DAG.getNode(ISD::STRICT_FADD, dl, { DstVT, MVT::Other },
- { SignCvt.getValue(1), SignCvt, SignCvt });
+ { Fast.getValue(1), Fast, Fast });
Chain = Slow.getValue(1);
+ // The STRICT_SINT_TO_FP inherits the exception mode from the
+ // incoming STRICT_UINT_TO_FP node; the STRICT_FADD node can
+ // never raise any exception.
+ SDNodeFlags Flags;
+ Flags.setFPExcept(Node->getFlags().hasFPExcept());
+ Fast->setFlags(Flags);
} else {
SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
+ Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
}
- // TODO: This really should be implemented using a branch rather than a
- // select. We happen to get lucky and machinesink does the right
- // thing most of the time. This would be a good candidate for a
- // pseudo-op, or, even better, for whole-function isel.
- EVT SetCCVT =
- getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
-
- SDValue SignBitTest = DAG.getSetCC(
- dl, SetCCVT, Src, DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
Result = DAG.getSelect(dl, DstVT, SignBitTest, Slow, Fast);
return true;
}
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 7df6ecdc5ef3..14e937f6b534 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -5246,13 +5246,6 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
SelectCode(Res.getNode());
return;
}
- case ISD::STRICT_FP_TO_SINT:
- case ISD::STRICT_FP_TO_UINT:
- // FIXME: Remove when we have isel patterns for strict versions of these
- // nodes.
- if (!TLI->isStrictFPEnabled())
- CurDAG->mutateStrictFPToFP(Node);
- break;
}
SelectCode(Node);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index cd6821c16e8b..8f510706b0ad 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19199,10 +19199,15 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
SDValue ThreshVal = DAG.getConstantFP(Thresh, DL, TheVT);
- SDValue Cmp = DAG.getSetCC(DL,
- getSetCCResultType(DAG.getDataLayout(),
- *DAG.getContext(), TheVT),
- Value, ThreshVal, ISD::SETLT);
+ EVT ResVT = getSetCCResultType(DAG.getDataLayout(),
+ *DAG.getContext(), TheVT);
+ SDValue Cmp;
+ if (IsStrict)
+ Cmp = DAG.getSetCC(DL, ResVT, Value, ThreshVal, ISD::SETLT,
+ Chain, /*IsSignaling*/ true);
+ else
+ Cmp = DAG.getSetCC(DL, ResVT, Value, ThreshVal, ISD::SETLT);
+
Adjust = DAG.getSelect(DL, MVT::i64, Cmp,
DAG.getConstant(0, DL, MVT::i64),
DAG.getConstant(APInt::getSignMask(64),
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 83a346543c46..b191bf646c9b 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -7102,22 +7102,22 @@ def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
(VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
-def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
+def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
(VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
-def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
+def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
(VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
-def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
+def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
(VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
-def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
+def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
(VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
-def : Pat<(f32 (sint_to_fp GR32:$src)),
+def : Pat<(f32 (any_sint_to_fp GR32:$src)),
(VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
-def : Pat<(f32 (sint_to_fp GR64:$src)),
+def : Pat<(f32 (any_sint_to_fp GR64:$src)),
(VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
-def : Pat<(f64 (sint_to_fp GR32:$src)),
+def : Pat<(f64 (any_sint_to_fp GR32:$src)),
(VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
-def : Pat<(f64 (sint_to_fp GR64:$src)),
+def : Pat<(f64 (any_sint_to_fp GR64:$src)),
(VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
@@ -7141,22 +7141,22 @@ def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
(VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
-def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
+def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
(VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
-def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
+def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
(VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
-def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
+def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
(VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
-def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
+def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
(VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
-def : Pat<(f32 (uint_to_fp GR32:$src)),
+def : Pat<(f32 (any_uint_to_fp GR32:$src)),
(VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
-def : Pat<(f32 (uint_to_fp GR64:$src)),
+def : Pat<(f32 (any_uint_to_fp GR64:$src)),
(VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
-def : Pat<(f64 (uint_to_fp GR32:$src)),
+def : Pat<(f64 (any_uint_to_fp GR32:$src)),
(VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
-def : Pat<(f64 (uint_to_fp GR64:$src)),
+def : Pat<(f64 (any_uint_to_fp GR64:$src)),
(VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
}
@@ -7227,82 +7227,82 @@ defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2u
let Predicates = [HasAVX512] in {
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
- (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
+ (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
(VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
- (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
+ (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
(VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
- (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
+ (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
(VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
- (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
+ (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
(VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
- (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
+ (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
(VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
- (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
+ (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
(VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
- (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
+ (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
(VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
- (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
+ (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
(VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
- (v4f32 (scalar_to_vector (f32 (uint_to_fp GR64:$src)))))),
+ (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
(VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
- (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi64 addr:$src))))))),
+ (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
(VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
- (v4f32 (scalar_to_vector (f32 (uint_to_fp GR32:$src)))))),
+ (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
(VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
- (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi32 addr:$src))))))),
+ (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
(VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
- (v2f64 (scalar_to_vector (f64 (uint_to_fp GR64:$src)))))),
+ (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
(VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
- (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi64 addr:$src))))))),
+ (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
(VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
- (v2f64 (scalar_to_vector (f64 (uint_to_fp GR32:$src)))))),
+ (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
(VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
- (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi32 addr:$src))))))),
+ (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
(VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
} // Predicates = [HasAVX512]
@@ -8108,10 +8108,10 @@ multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
VK4WM:$mask, i64mem:$src), 0, "att">;
}
-defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
+defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, X86VSintToFP,
SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
-defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
+defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp,
X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
PS, EVEX_CD8<32, CD8VF>;
@@ -8131,11 +8131,11 @@ defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui,
X86cvttp2uiSAE, SchedWriteCvtPD2DQ>,
PS, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
+defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
X86VUintToFP, SchedWriteCvtDQ2PD>, XS,
EVEX_CD8<32, CD8VH>;
-defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
+defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD,
EVEX_CD8<32, CD8VF>;
@@ -8187,19 +8187,19 @@ defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui,
X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PD,
EVEX_CD8<32, CD8VH>;
-defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
+defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
EVEX_CD8<64, CD8VF>;
-defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
+defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
EVEX_CD8<64, CD8VF>;
-defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp,
+defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", any_sint_to_fp,
X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS,
EVEX_CD8<64, CD8VF>;
-defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp,
+defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", any_uint_to_fp,
X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD,
EVEX_CD8<64, CD8VF>;
@@ -8383,17 +8383,17 @@ def : Pat<(v4i32 (X86cvttp2ui (v4f64 VR256X:$src1))),
(v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src1, sub_ymm)))), sub_xmm)>;
-def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
+def : Pat<(v8f32 (any_uint_to_fp (v8i32 VR256X:$src1))),
(EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src1, sub_ymm)))), sub_ymm)>;
-def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
+def : Pat<(v4f32 (any_uint_to_fp (v4i32 VR128X:$src1))),
(EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
VR128X:$src1, sub_xmm)))), sub_xmm)>;
-def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
+def : Pat<(v4f64 (any_uint_to_fp (v4i32 VR128X:$src1))),
(EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
(v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
VR128X:$src1, sub_xmm)))), sub_ymm)>;
@@ -8519,32 +8519,32 @@ def : Pat<(v4i64 (X86cvttp2ui (v4f64 VR256X:$src1))),
(v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src1, sub_ymm)))), sub_ymm)>;
-def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
+def : Pat<(v4f32 (any_sint_to_fp (v4i64 VR256X:$src1))),
(EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src1, sub_ymm)))), sub_xmm)>;
-def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
+def : Pat<(v2f64 (any_sint_to_fp (v2i64 VR128X:$src1))),
(EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
VR128X:$src1, sub_xmm)))), sub_xmm)>;
-def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
+def : Pat<(v4f64 (any_sint_to_fp (v4i64 VR256X:$src1))),
(EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src1, sub_ymm)))), sub_ymm)>;
-def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
+def : Pat<(v4f32 (any_uint_to_fp (v4i64 VR256X:$src1))),
(EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src1, sub_ymm)))), sub_xmm)>;
-def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
+def : Pat<(v2f64 (any_uint_to_fp (v2i64 VR128X:$src1))),
(EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
VR128X:$src1, sub_xmm)))), sub_xmm)>;
-def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
+def : Pat<(v4f64 (any_uint_to_fp (v4i64 VR256X:$src1))),
(EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src1, sub_ymm)))), sub_ymm)>;
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index a2a5f1f1d435..78aa9f367be4 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -842,11 +842,11 @@ multiclass sse12_cvt_p<bits<8> opc, RegisterClass RC, X86MemOperand x86memop,
string asm, Domain d, X86FoldableSchedWrite sched> {
let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in {
def rr : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), asm,
- [(set RC:$dst, (DstTy (sint_to_fp (SrcTy RC:$src))))], d>,
+ [(set RC:$dst, (DstTy (any_sint_to_fp (SrcTy RC:$src))))], d>,
Sched<[sched]>;
let mayLoad = 1 in
def rm : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), asm,
- [(set RC:$dst, (DstTy (sint_to_fp
+ [(set RC:$dst, (DstTy (any_sint_to_fp
(SrcTy (ld_frag addr:$src)))))], d>,
Sched<[sched.Folded]>;
}
@@ -906,22 +906,22 @@ defm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd", "q",
} // isCodeGenOnly = 1
let Predicates = [UseAVX] in {
- def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
+ def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
(VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
- def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
+ def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
(VCVTSI642SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
- def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
+ def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
(VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>;
- def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
+ def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
(VCVTSI642SDrm (f64 (IMPLICIT_DEF)), addr:$src)>;
- def : Pat<(f32 (sint_to_fp GR32:$src)),
+ def : Pat<(f32 (any_sint_to_fp GR32:$src)),
(VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
- def : Pat<(f32 (sint_to_fp GR64:$src)),
+ def : Pat<(f32 (any_sint_to_fp GR64:$src)),
(VCVTSI642SSrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
- def : Pat<(f64 (sint_to_fp GR32:$src)),
+ def : Pat<(f64 (any_sint_to_fp GR32:$src)),
(VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
- def : Pat<(f64 (sint_to_fp GR64:$src)),
+ def : Pat<(f64 (any_sint_to_fp GR64:$src)),
(VCVTSI642SDrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
}
@@ -938,16 +938,16 @@ defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64,
defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64,
"cvttsd2si", "cvttsd2si",
WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC;
-defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
+defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, any_sint_to_fp, i32mem, loadi32,
"cvtsi2ss", "cvtsi2ss{l}",
WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, SIMD_EXC;
-defm CVTSI642SS : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64,
+defm CVTSI642SS : sse12_cvt_s<0x2A, GR64, FR32, any_sint_to_fp, i64mem, loadi64,
"cvtsi2ss", "cvtsi2ss{q}",
WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, REX_W, SIMD_EXC;
-defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
+defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, any_sint_to_fp, i32mem, loadi32,
"cvtsi2sd", "cvtsi2sd{l}",
WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD;
-defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
+defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, any_sint_to_fp, i64mem, loadi64,
"cvtsi2sd", "cvtsi2sd{q}",
WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD, REX_W, SIMD_EXC;
} // isCodeGenOnly = 1
@@ -1346,42 +1346,42 @@ def : Pat<(v2f64 (X86Movsd
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
- (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
+ (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
(VCVTSI642SSrr_Int VR128:$dst, GR64:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
- (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
+ (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
(VCVTSI642SSrm_Int VR128:$dst, addr:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
- (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
+ (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
(VCVTSI2SSrr_Int VR128:$dst, GR32:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
- (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
+ (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
(VCVTSI2SSrm_Int VR128:$dst, addr:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
- (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
+ (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
(VCVTSI642SDrr_Int VR128:$dst, GR64:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
- (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
+ (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
(VCVTSI642SDrm_Int VR128:$dst, addr:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
- (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
+ (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
(VCVTSI2SDrr_Int VR128:$dst, GR32:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
- (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
+ (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
(VCVTSI2SDrm_Int VR128:$dst, addr:$src)>;
} // Predicates = [UseAVX]
@@ -1400,44 +1400,44 @@ def : Pat<(v2f64 (X86Movsd
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
- (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
+ (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
(CVTSI642SDrr_Int VR128:$dst, GR64:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
- (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
+ (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
(CVTSI642SDrm_Int VR128:$dst, addr:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
- (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
+ (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
(CVTSI2SDrr_Int VR128:$dst, GR32:$src)>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128:$dst),
- (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
+ (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
(CVTSI2SDrm_Int VR128:$dst, addr:$src)>;
} // Predicates = [UseSSE2]
let Predicates = [UseSSE1] in {
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
- (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
+ (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
(CVTSI642SSrr_Int VR128:$dst, GR64:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
- (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
+ (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
(CVTSI642SSrm_Int VR128:$dst, addr:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
- (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
+ (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
(CVTSI2SSrr_Int VR128:$dst, GR32:$src)>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128:$dst),
- (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
+ (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
(CVTSI2SSrm_Int VR128:$dst, addr:$src)>;
} // Predicates = [UseSSE1]
@@ -1663,13 +1663,13 @@ def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
- (v4f64 (sint_to_fp (loadv4i32 addr:$src))))]>,
+ (v4f64 (any_sint_to_fp (loadv4i32 addr:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtI2PDYLd]>,
VEX_WIG;
def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
- (v4f64 (sint_to_fp (v4i32 VR128:$src))))]>,
+ (v4f64 (any_sint_to_fp (v4i32 VR128:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtI2PDY]>, VEX_WIG;
}
diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-conv-10.ll b/llvm/test/CodeGen/SystemZ/fp-strict-conv-10.ll
index 9de37a78c10c..5327eccdcce6 100644
--- a/llvm/test/CodeGen/SystemZ/fp-strict-conv-10.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-strict-conv-10.ll
@@ -19,7 +19,7 @@ define i32 @f1(float %f) #0 {
; CHECK: # %bb.0:
; CHECK-NEXT: larl %r1, .LCPI0_0
; CHECK-NEXT: le %f1, 0(%r1)
-; CHECK-NEXT: cebr %f0, %f1
+; CHECK-NEXT: kebr %f0, %f1
; CHECK-NEXT: jnl .LBB0_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: lhi %r0, 0
@@ -43,7 +43,7 @@ define i32 @f2(double %f) #0 {
; CHECK: # %bb.0:
; CHECK-NEXT: larl %r1, .LCPI1_0
; CHECK-NEXT: ldeb %f1, 0(%r1)
-; CHECK-NEXT: cdbr %f0, %f1
+; CHECK-NEXT: kdbr %f0, %f1
; CHECK-NEXT: jnl .LBB1_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: lhi %r0, 0
@@ -69,7 +69,7 @@ define i32 @f3(fp128 *%src) #0 {
; CHECK-NEXT: ld %f2, 8(%r2)
; CHECK-NEXT: larl %r1, .LCPI2_0
; CHECK-NEXT: lxeb %f1, 0(%r1)
-; CHECK-NEXT: cxbr %f0, %f1
+; CHECK-NEXT: kxbr %f0, %f1
; CHECK-NEXT: jnl .LBB2_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: lhi %r0, 0
diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-conv-12.ll b/llvm/test/CodeGen/SystemZ/fp-strict-conv-12.ll
index f45902d8b3c3..e7ed6af330cf 100644
--- a/llvm/test/CodeGen/SystemZ/fp-strict-conv-12.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-strict-conv-12.ll
@@ -18,7 +18,7 @@ define i64 @f1(float %f) #0 {
; CHECK: # %bb.0:
; CHECK-NEXT: larl %r1, .LCPI0_0
; CHECK-NEXT: le %f1, 0(%r1)
-; CHECK-NEXT: cebr %f0, %f1
+; CHECK-NEXT: kebr %f0, %f1
; CHECK-NEXT: jnl .LBB0_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: lghi %r0, 0
@@ -42,7 +42,7 @@ define i64 @f2(double %f) #0 {
; CHECK: # %bb.0:
; CHECK-NEXT: larl %r1, .LCPI1_0
; CHECK-NEXT: ldeb %f1, 0(%r1)
-; CHECK-NEXT: cdbr %f0, %f1
+; CHECK-NEXT: kdbr %f0, %f1
; CHECK-NEXT: jnl .LBB1_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: lghi %r0, 0
@@ -68,7 +68,7 @@ define i64 @f3(fp128 *%src) #0 {
; CHECK-NEXT: ld %f2, 8(%r2)
; CHECK-NEXT: larl %r1, .LCPI2_0
; CHECK-NEXT: lxeb %f1, 0(%r1)
-; CHECK-NEXT: cxbr %f0, %f1
+; CHECK-NEXT: kxbr %f0, %f1
; CHECK-NEXT: jnl .LBB2_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: lghi %r0, 0
diff --git a/llvm/test/CodeGen/X86/fp-intrinsics-flags.ll b/llvm/test/CodeGen/X86/fp-intrinsics-flags.ll
index 623e63a93e4f..5df3d8f7a4a8 100644
--- a/llvm/test/CodeGen/X86/fp-intrinsics-flags.ll
+++ b/llvm/test/CodeGen/X86/fp-intrinsics-flags.ll
@@ -29,13 +29,13 @@ entry:
; CHECK-LABEL: name: f20u64
; CHECK: [[MOVSDrm_alt:%[0-9]+]]:fr64 = MOVSDrm_alt %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.0, align 16)
; CHECK: [[MOVSDrm_alt1:%[0-9]+]]:fr64 = MOVSDrm_alt $noreg, 1, $noreg, %const.0, $noreg :: (load 8 from constant-pool)
-; CHECK: [[CMPSDrr:%[0-9]+]]:fr64 = CMPSDrr [[MOVSDrm_alt]], [[MOVSDrm_alt1]], 1, implicit $mxcsr
-; CHECK: [[COPY:%[0-9]+]]:vr128 = COPY [[CMPSDrr]]
-; CHECK: [[COPY1:%[0-9]+]]:vr128 = COPY [[MOVSDrm_alt1]]
-; CHECK: [[PANDNrr:%[0-9]+]]:vr128 = PANDNrr [[COPY]], killed [[COPY1]]
-; CHECK: [[COPY2:%[0-9]+]]:fr64 = COPY [[PANDNrr]]
-; CHECK: [[SUBSDrr:%[0-9]+]]:fr64 = SUBSDrr [[MOVSDrm_alt]], killed [[COPY2]], implicit $mxcsr
+; CHECK: COMISDrr [[MOVSDrm_alt1]], [[MOVSDrm_alt]], implicit-def $eflags, implicit $mxcsr
+; CHECK: [[FsFLD0SD:%[0-9]+]]:fr64 = FsFLD0SD
+; CHECK: JCC_1
+; CHECK: [[PHI:%[0-9]+]]:fr64 = PHI [[MOVSDrm_alt1]], {{.*}}, [[FsFLD0SD]], {{.*}}
+; CHECK: [[SUBSDrr:%[0-9]+]]:fr64 = SUBSDrr [[MOVSDrm_alt]], killed [[PHI]], implicit $mxcsr
; CHECK: MOVSDmr %stack.0, 1, $noreg, 0, $noreg, killed [[SUBSDrr]] :: (store 8 into %stack.0)
+; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 6, implicit $eflags
; CHECK: [[LD_Fp64m:%[0-9]+]]:rfp64 = LD_Fp64m %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 8 from %stack.0)
; CHECK: FNSTCW16m %stack.1, 1, $noreg, 0, $noreg, implicit-def $fpsw, implicit $fpcw :: (store 2 into %stack.1)
; CHECK: [[MOVZX32rm16_:%[0-9]+]]:gr32 = MOVZX32rm16 %stack.1, 1, $noreg, 0, $noreg :: (load 2 from %stack.1)
@@ -45,8 +45,6 @@ entry:
; CHECK: FLDCW16m %stack.2, 1, $noreg, 0, $noreg, implicit-def $fpsw, implicit-def $fpcw :: (load 2 from %stack.2)
; CHECK: IST_Fp64m64 %stack.0, 1, $noreg, 0, $noreg, [[LD_Fp64m]], implicit-def $fpsw, implicit $fpcw
; CHECK: FLDCW16m %stack.1, 1, $noreg, 0, $noreg, implicit-def $fpsw, implicit-def $fpcw :: (load 2 from %stack.1)
-; CHECK: UCOMISDrr [[MOVSDrm_alt1]], [[MOVSDrm_alt]], implicit-def $eflags, implicit $mxcsr
-; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 6, implicit $eflags
; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]]
; CHECK: [[SHL32ri:%[0-9]+]]:gr32 = SHL32ri [[MOVZX32rr8_]], 31, implicit-def dead $eflags
; CHECK: [[XOR32rm:%[0-9]+]]:gr32 = XOR32rm [[SHL32ri]], %stack.0, 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load 4 from %stack.0 + 4)
@@ -86,16 +84,14 @@ entry:
; CHECK-LABEL: name: f20u
; CHECK: [[MOVSDrm_alt:%[0-9]+]]:fr64 = MOVSDrm_alt %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.0, align 16)
; CHECK: [[MOVSDrm_alt1:%[0-9]+]]:fr64 = MOVSDrm_alt $noreg, 1, $noreg, %const.0, $noreg :: (load 8 from constant-pool)
-; CHECK: UCOMISDrr [[MOVSDrm_alt1]], [[MOVSDrm_alt]], implicit-def $eflags, implicit $mxcsr
+; CHECK: COMISDrr [[MOVSDrm_alt1]], [[MOVSDrm_alt]], implicit-def $eflags, implicit $mxcsr
+; CHECK: [[FsFLD0SD:%[0-9]+]]:fr64 = FsFLD0SD
+; CHECK: JCC_1
+; CHECK: [[PHI:%[0-9]+]]:fr64 = PHI [[MOVSDrm_alt1]], {{.*}}, [[FsFLD0SD]], {{.*}}
; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 6, implicit $eflags
; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]]
; CHECK: [[SHL32ri:%[0-9]+]]:gr32 = SHL32ri [[MOVZX32rr8_]], 31, implicit-def dead $eflags
-; CHECK: [[CMPSDrr:%[0-9]+]]:fr64 = CMPSDrr [[MOVSDrm_alt]], [[MOVSDrm_alt1]], 1, implicit $mxcsr
-; CHECK: [[COPY:%[0-9]+]]:vr128 = COPY [[CMPSDrr]]
-; CHECK: [[COPY1:%[0-9]+]]:vr128 = COPY [[MOVSDrm_alt1]]
-; CHECK: [[PANDNrr:%[0-9]+]]:vr128 = PANDNrr [[COPY]], killed [[COPY1]]
-; CHECK: [[COPY2:%[0-9]+]]:fr64 = COPY [[PANDNrr]]
-; CHECK: [[SUBSDrr:%[0-9]+]]:fr64 = SUBSDrr [[MOVSDrm_alt]], killed [[COPY2]], implicit $mxcsr
+; CHECK: [[SUBSDrr:%[0-9]+]]:fr64 = SUBSDrr [[MOVSDrm_alt]], killed [[PHI]], implicit $mxcsr
; CHECK: [[CVTTSD2SIrr:%[0-9]+]]:gr32 = CVTTSD2SIrr killed [[SUBSDrr]], implicit $mxcsr
; CHECK: [[XOR32rr:%[0-9]+]]:gr32 = XOR32rr [[CVTTSD2SIrr]], killed [[SHL32ri]], implicit-def dead $eflags
; CHECK: $eax = COPY [[XOR32rr]]
diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll
index 124a43f53ddf..2135cdb0404d 100644
--- a/llvm/test/CodeGen/X86/fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll
@@ -1277,15 +1277,17 @@ define i32 @f20u(double %x) #0 {
; X86-SSE-LABEL: f20u:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; X86-SSE-NEXT: xorl %ecx, %ecx
-; X86-SSE-NEXT: ucomisd %xmm0, %xmm1
-; X86-SSE-NEXT: setbe %cl
+; X86-SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
+; X86-SSE-NEXT: comisd %xmm0, %xmm2
+; X86-SSE-NEXT: xorpd %xmm1, %xmm1
+; X86-SSE-NEXT: ja .LBB24_2
+; X86-SSE-NEXT: # %bb.1: # %entry
+; X86-SSE-NEXT: movapd %xmm2, %xmm1
+; X86-SSE-NEXT: .LBB24_2: # %entry
+; X86-SSE-NEXT: setbe %al
+; X86-SSE-NEXT: movzbl %al, %ecx
; X86-SSE-NEXT: shll $31, %ecx
-; X86-SSE-NEXT: movapd %xmm0, %xmm2
-; X86-SSE-NEXT: cmpltsd %xmm1, %xmm2
-; X86-SSE-NEXT: andnpd %xmm1, %xmm2
-; X86-SSE-NEXT: subsd %xmm2, %xmm0
+; X86-SSE-NEXT: subsd %xmm1, %xmm0
; X86-SSE-NEXT: cvttsd2si %xmm0, %eax
; X86-SSE-NEXT: xorl %ecx, %eax
; X86-SSE-NEXT: retl
@@ -1324,7 +1326,7 @@ define i64 @f20u64(double %x) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: flds {{\.LCPI.*}}
; X87-NEXT: xorl %edx, %edx
-; X87-NEXT: fucomi %st(1), %st
+; X87-NEXT: fcomi %st(1), %st
; X87-NEXT: setbe %dl
; X87-NEXT: fldz
; X87-NEXT: fxch %st(1)
@@ -1350,24 +1352,25 @@ define i64 @f20u64(double %x) #0 {
; X86-SSE-NEXT: subl $20, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 24
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; X86-SSE-NEXT: movapd %xmm0, %xmm2
-; X86-SSE-NEXT: cmpltsd %xmm1, %xmm2
-; X86-SSE-NEXT: andnpd %xmm1, %xmm2
-; X86-SSE-NEXT: movapd %xmm0, %xmm3
-; X86-SSE-NEXT: subsd %xmm2, %xmm3
-; X86-SSE-NEXT: movsd %xmm3, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
+; X86-SSE-NEXT: comisd %xmm0, %xmm2
+; X86-SSE-NEXT: xorpd %xmm1, %xmm1
+; X86-SSE-NEXT: ja .LBB25_2
+; X86-SSE-NEXT: # %bb.1: # %entry
+; X86-SSE-NEXT: movapd %xmm2, %xmm1
+; X86-SSE-NEXT: .LBB25_2: # %entry
+; X86-SSE-NEXT: subsd %xmm1, %xmm0
+; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: setbe %al
; X86-SSE-NEXT: fldl {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-SSE-NEXT: orl $3072, %eax # imm = 0xC00
-; X86-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT: orl $3072, %ecx # imm = 0xC00
+; X86-SSE-NEXT: movw %cx, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: xorl %edx, %edx
-; X86-SSE-NEXT: ucomisd %xmm0, %xmm1
-; X86-SSE-NEXT: setbe %dl
+; X86-SSE-NEXT: movzbl %al, %edx
; X86-SSE-NEXT: shll $31, %edx
; X86-SSE-NEXT: xorl {{[0-9]+}}(%esp), %edx
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -1377,30 +1380,35 @@ define i64 @f20u64(double %x) #0 {
;
; SSE-LABEL: f20u64:
; SSE: # %bb.0: # %entry
-; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE-NEXT: xorl %ecx, %ecx
-; SSE-NEXT: ucomisd %xmm1, %xmm0
-; SSE-NEXT: setae %cl
-; SSE-NEXT: shlq $63, %rcx
-; SSE-NEXT: movapd %xmm0, %xmm2
-; SSE-NEXT: cmpltsd %xmm1, %xmm2
-; SSE-NEXT: andnpd %xmm1, %xmm2
-; SSE-NEXT: subsd %xmm2, %xmm0
-; SSE-NEXT: cvttsd2si %xmm0, %rax
+; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
+; SSE-NEXT: comisd %xmm2, %xmm0
+; SSE-NEXT: xorpd %xmm1, %xmm1
+; SSE-NEXT: jb .LBB25_2
+; SSE-NEXT: # %bb.1: # %entry
+; SSE-NEXT: movapd %xmm2, %xmm1
+; SSE-NEXT: .LBB25_2: # %entry
+; SSE-NEXT: subsd %xmm1, %xmm0
+; SSE-NEXT: cvttsd2si %xmm0, %rcx
+; SSE-NEXT: setae %al
+; SSE-NEXT: movzbl %al, %eax
+; SSE-NEXT: shlq $63, %rax
; SSE-NEXT: xorq %rcx, %rax
; SSE-NEXT: retq
;
; AVX1-LABEL: f20u64:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX1-NEXT: xorl %ecx, %ecx
-; AVX1-NEXT: vucomisd %xmm1, %xmm0
-; AVX1-NEXT: setae %cl
-; AVX1-NEXT: shlq $63, %rcx
-; AVX1-NEXT: vcmpltsd %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vandnpd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vcvttsd2si %xmm0, %rax
+; AVX1-NEXT: vcomisd %xmm1, %xmm0
+; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: jb .LBB25_2
+; AVX1-NEXT: # %bb.1: # %entry
+; AVX1-NEXT: vmovapd %xmm1, %xmm2
+; AVX1-NEXT: .LBB25_2: # %entry
+; AVX1-NEXT: vsubsd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vcvttsd2si %xmm0, %rcx
+; AVX1-NEXT: setae %al
+; AVX1-NEXT: movzbl %al, %eax
+; AVX1-NEXT: shlq $63, %rax
; AVX1-NEXT: xorq %rcx, %rax
; AVX1-NEXT: retq
;
@@ -2656,34 +2664,34 @@ define float @uiffl(i64 %x) #0 {
;
; SSE-LABEL: uiffl:
; SSE: # %bb.0: # %entry
-; SSE-NEXT: testq %rdi, %rdi
-; SSE-NEXT: js .LBB52_1
-; SSE-NEXT: # %bb.2: # %entry
-; SSE-NEXT: cvtsi2ss %rdi, %xmm0
-; SSE-NEXT: retq
-; SSE-NEXT: .LBB52_1:
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: shrq %rax
-; SSE-NEXT: andl $1, %edi
-; SSE-NEXT: orq %rax, %rdi
-; SSE-NEXT: cvtsi2ss %rdi, %xmm0
+; SSE-NEXT: movl %edi, %ecx
+; SSE-NEXT: andl $1, %ecx
+; SSE-NEXT: orq %rax, %rcx
+; SSE-NEXT: testq %rdi, %rdi
+; SSE-NEXT: cmovnsq %rdi, %rcx
+; SSE-NEXT: cvtsi2ss %rcx, %xmm0
+; SSE-NEXT: jns .LBB52_2
+; SSE-NEXT: # %bb.1:
; SSE-NEXT: addss %xmm0, %xmm0
+; SSE-NEXT: .LBB52_2: # %entry
; SSE-NEXT: retq
;
; AVX1-LABEL: uiffl:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: testq %rdi, %rdi
-; AVX1-NEXT: js .LBB52_1
-; AVX1-NEXT: # %bb.2: # %entry
-; AVX1-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
-; AVX1-NEXT: retq
-; AVX1-NEXT: .LBB52_1:
; AVX1-NEXT: movq %rdi, %rax
; AVX1-NEXT: shrq %rax
-; AVX1-NEXT: andl $1, %edi
-; AVX1-NEXT: orq %rax, %rdi
-; AVX1-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
+; AVX1-NEXT: movl %edi, %ecx
+; AVX1-NEXT: andl $1, %ecx
+; AVX1-NEXT: orq %rax, %rcx
+; AVX1-NEXT: testq %rdi, %rdi
+; AVX1-NEXT: cmovnsq %rdi, %rcx
+; AVX1-NEXT: vcvtsi2ss %rcx, %xmm0, %xmm0
+; AVX1-NEXT: jns .LBB52_2
+; AVX1-NEXT: # %bb.1:
; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: .LBB52_2: # %entry
; AVX1-NEXT: retq
;
; AVX512-LABEL: uiffl:
diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll
index 680acf98d2a1..dc54f15a650d 100644
--- a/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll
+++ b/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll
@@ -437,15 +437,17 @@ define i32 @fptoui_f32toi32(float %x) #0 {
; SSE-X86-LABEL: fptoui_f32toi32:
; SSE-X86: # %bb.0:
; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-X86-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE-X86-NEXT: xorl %ecx, %ecx
-; SSE-X86-NEXT: ucomiss %xmm0, %xmm1
-; SSE-X86-NEXT: setbe %cl
+; SSE-X86-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE-X86-NEXT: comiss %xmm0, %xmm2
+; SSE-X86-NEXT: xorps %xmm1, %xmm1
+; SSE-X86-NEXT: ja .LBB8_2
+; SSE-X86-NEXT: # %bb.1:
+; SSE-X86-NEXT: movaps %xmm2, %xmm1
+; SSE-X86-NEXT: .LBB8_2:
+; SSE-X86-NEXT: setbe %al
+; SSE-X86-NEXT: movzbl %al, %ecx
; SSE-X86-NEXT: shll $31, %ecx
-; SSE-X86-NEXT: movaps %xmm0, %xmm2
-; SSE-X86-NEXT: cmpltss %xmm1, %xmm2
-; SSE-X86-NEXT: andnps %xmm1, %xmm2
-; SSE-X86-NEXT: subss %xmm2, %xmm0
+; SSE-X86-NEXT: subss %xmm1, %xmm0
; SSE-X86-NEXT: cvttss2si %xmm0, %eax
; SSE-X86-NEXT: xorl %ecx, %eax
; SSE-X86-NEXT: retl
@@ -529,24 +531,25 @@ define i64 @fptoui_f32toi64(float %x) #0 {
; SSE-X86-NEXT: andl $-8, %esp
; SSE-X86-NEXT: subl $16, %esp
; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-X86-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE-X86-NEXT: movaps %xmm0, %xmm2
-; SSE-X86-NEXT: cmpltss %xmm1, %xmm2
-; SSE-X86-NEXT: andnps %xmm1, %xmm2
-; SSE-X86-NEXT: movaps %xmm0, %xmm3
-; SSE-X86-NEXT: subss %xmm2, %xmm3
-; SSE-X86-NEXT: movss %xmm3, {{[0-9]+}}(%esp)
+; SSE-X86-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE-X86-NEXT: comiss %xmm0, %xmm2
+; SSE-X86-NEXT: xorps %xmm1, %xmm1
+; SSE-X86-NEXT: ja .LBB9_2
+; SSE-X86-NEXT: # %bb.1:
+; SSE-X86-NEXT: movaps %xmm2, %xmm1
+; SSE-X86-NEXT: .LBB9_2:
+; SSE-X86-NEXT: subss %xmm1, %xmm0
+; SSE-X86-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
+; SSE-X86-NEXT: setbe %al
; SSE-X86-NEXT: flds {{[0-9]+}}(%esp)
; SSE-X86-NEXT: fnstcw {{[0-9]+}}(%esp)
-; SSE-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; SSE-X86-NEXT: orl $3072, %eax # imm = 0xC00
-; SSE-X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; SSE-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; SSE-X86-NEXT: orl $3072, %ecx # imm = 0xC00
+; SSE-X86-NEXT: movw %cx, {{[0-9]+}}(%esp)
; SSE-X86-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE-X86-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE-X86-NEXT: fldcw {{[0-9]+}}(%esp)
-; SSE-X86-NEXT: xorl %edx, %edx
-; SSE-X86-NEXT: ucomiss %xmm0, %xmm1
-; SSE-X86-NEXT: setbe %dl
+; SSE-X86-NEXT: movzbl %al, %edx
; SSE-X86-NEXT: shll $31, %edx
; SSE-X86-NEXT: xorl {{[0-9]+}}(%esp), %edx
; SSE-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -557,16 +560,18 @@ define i64 @fptoui_f32toi64(float %x) #0 {
;
; SSE-X64-LABEL: fptoui_f32toi64:
; SSE-X64: # %bb.0:
-; SSE-X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE-X64-NEXT: xorl %ecx, %ecx
-; SSE-X64-NEXT: ucomiss %xmm1, %xmm0
-; SSE-X64-NEXT: setae %cl
-; SSE-X64-NEXT: shlq $63, %rcx
-; SSE-X64-NEXT: movaps %xmm0, %xmm2
-; SSE-X64-NEXT: cmpltss %xmm1, %xmm2
-; SSE-X64-NEXT: andnps %xmm1, %xmm2
-; SSE-X64-NEXT: subss %xmm2, %xmm0
-; SSE-X64-NEXT: cvttss2si %xmm0, %rax
+; SSE-X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE-X64-NEXT: comiss %xmm2, %xmm0
+; SSE-X64-NEXT: xorps %xmm1, %xmm1
+; SSE-X64-NEXT: jb .LBB9_2
+; SSE-X64-NEXT: # %bb.1:
+; SSE-X64-NEXT: movaps %xmm2, %xmm1
+; SSE-X64-NEXT: .LBB9_2:
+; SSE-X64-NEXT: subss %xmm1, %xmm0
+; SSE-X64-NEXT: cvttss2si %xmm0, %rcx
+; SSE-X64-NEXT: setae %al
+; SSE-X64-NEXT: movzbl %al, %eax
+; SSE-X64-NEXT: shlq $63, %rax
; SSE-X64-NEXT: xorq %rcx, %rax
; SSE-X64-NEXT: retq
;
@@ -581,15 +586,18 @@ define i64 @fptoui_f32toi64(float %x) #0 {
; AVX1-X86-NEXT: subl $8, %esp
; AVX1-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX1-X86-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX1-X86-NEXT: vcmpltss %xmm1, %xmm0, %xmm2
-; AVX1-X86-NEXT: vandnps %xmm1, %xmm2, %xmm2
-; AVX1-X86-NEXT: vsubss %xmm2, %xmm0, %xmm2
-; AVX1-X86-NEXT: vmovss %xmm2, (%esp)
+; AVX1-X86-NEXT: vcomiss %xmm0, %xmm1
+; AVX1-X86-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX1-X86-NEXT: ja .LBB9_2
+; AVX1-X86-NEXT: # %bb.1:
+; AVX1-X86-NEXT: vmovaps %xmm1, %xmm2
+; AVX1-X86-NEXT: .LBB9_2:
+; AVX1-X86-NEXT: vsubss %xmm2, %xmm0, %xmm0
+; AVX1-X86-NEXT: vmovss %xmm0, (%esp)
; AVX1-X86-NEXT: flds (%esp)
; AVX1-X86-NEXT: fisttpll (%esp)
-; AVX1-X86-NEXT: xorl %edx, %edx
-; AVX1-X86-NEXT: vucomiss %xmm0, %xmm1
-; AVX1-X86-NEXT: setbe %dl
+; AVX1-X86-NEXT: setbe %al
+; AVX1-X86-NEXT: movzbl %al, %edx
; AVX1-X86-NEXT: shll $31, %edx
; AVX1-X86-NEXT: xorl {{[0-9]+}}(%esp), %edx
; AVX1-X86-NEXT: movl (%esp), %eax
@@ -601,14 +609,17 @@ define i64 @fptoui_f32toi64(float %x) #0 {
; AVX1-X64-LABEL: fptoui_f32toi64:
; AVX1-X64: # %bb.0:
; AVX1-X64-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX1-X64-NEXT: xorl %ecx, %ecx
-; AVX1-X64-NEXT: vucomiss %xmm1, %xmm0
-; AVX1-X64-NEXT: setae %cl
-; AVX1-X64-NEXT: shlq $63, %rcx
-; AVX1-X64-NEXT: vcmpltss %xmm1, %xmm0, %xmm2
-; AVX1-X64-NEXT: vandnps %xmm1, %xmm2, %xmm1
-; AVX1-X64-NEXT: vsubss %xmm1, %xmm0, %xmm0
-; AVX1-X64-NEXT: vcvttss2si %xmm0, %rax
+; AVX1-X64-NEXT: vcomiss %xmm1, %xmm0
+; AVX1-X64-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX1-X64-NEXT: jb .LBB9_2
+; AVX1-X64-NEXT: # %bb.1:
+; AVX1-X64-NEXT: vmovaps %xmm1, %xmm2
+; AVX1-X64-NEXT: .LBB9_2:
+; AVX1-X64-NEXT: vsubss %xmm2, %xmm0, %xmm0
+; AVX1-X64-NEXT: vcvttss2si %xmm0, %rcx
+; AVX1-X64-NEXT: setae %al
+; AVX1-X64-NEXT: movzbl %al, %eax
+; AVX1-X64-NEXT: shlq $63, %rax
; AVX1-X64-NEXT: xorq %rcx, %rax
; AVX1-X64-NEXT: retq
;
@@ -623,10 +634,11 @@ define i64 @fptoui_f32toi64(float %x) #0 {
; AVX512-X86-NEXT: subl $8, %esp
; AVX512-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512-X86-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX512-X86-NEXT: vcmpltss %xmm1, %xmm0, %k1
-; AVX512-X86-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX512-X86-NEXT: xorl %edx, %edx
-; AVX512-X86-NEXT: vucomiss %xmm0, %xmm1
+; AVX512-X86-NEXT: vcomiss %xmm0, %xmm1
+; AVX512-X86-NEXT: seta %al
+; AVX512-X86-NEXT: kmovw %eax, %k1
+; AVX512-X86-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX512-X86-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1}
; AVX512-X86-NEXT: vsubss %xmm1, %xmm0, %xmm0
; AVX512-X86-NEXT: vmovss %xmm0, (%esp)
@@ -657,7 +669,7 @@ define i64 @fptoui_f32toi64(float %x) #0 {
; CHECK-NEXT: subl $16, %esp
; CHECK-NEXT: flds 8(%ebp)
; CHECK-NEXT: flds {{\.LCPI.*}}
-; CHECK-NEXT: fucom %st(1)
+; CHECK-NEXT: fcom %st(1)
; CHECK-NEXT: fnstsw %ax
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: # kill: def $ah killed $ah killed $ax
@@ -1054,15 +1066,17 @@ define i32 @fptoui_f64toi32(double %x) #0 {
; SSE-X86-LABEL: fptoui_f64toi32:
; SSE-X86: # %bb.0:
; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-X86-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE-X86-NEXT: xorl %ecx, %ecx
-; SSE-X86-NEXT: ucomisd %xmm0, %xmm1
-; SSE-X86-NEXT: setbe %cl
+; SSE-X86-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
+; SSE-X86-NEXT: comisd %xmm0, %xmm2
+; SSE-X86-NEXT: xorpd %xmm1, %xmm1
+; SSE-X86-NEXT: ja .LBB17_2
+; SSE-X86-NEXT: # %bb.1:
+; SSE-X86-NEXT: movapd %xmm2, %xmm1
+; SSE-X86-NEXT: .LBB17_2:
+; SSE-X86-NEXT: setbe %al
+; SSE-X86-NEXT: movzbl %al, %ecx
; SSE-X86-NEXT: shll $31, %ecx
-; SSE-X86-NEXT: movapd %xmm0, %xmm2
-; SSE-X86-NEXT: cmpltsd %xmm1, %xmm2
-; SSE-X86-NEXT: andnpd %xmm1, %xmm2
-; SSE-X86-NEXT: subsd %xmm2, %xmm0
+; SSE-X86-NEXT: subsd %xmm1, %xmm0
; SSE-X86-NEXT: cvttsd2si %xmm0, %eax
; SSE-X86-NEXT: xorl %ecx, %eax
; SSE-X86-NEXT: retl
@@ -1146,24 +1160,25 @@ define i64 @fptoui_f64toi64(double %x) #0 {
; SSE-X86-NEXT: andl $-8, %esp
; SSE-X86-NEXT: subl $16, %esp
; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-X86-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE-X86-NEXT: movapd %xmm0, %xmm2
-; SSE-X86-NEXT: cmpltsd %xmm1, %xmm2
-; SSE-X86-NEXT: andnpd %xmm1, %xmm2
-; SSE-X86-NEXT: movapd %xmm0, %xmm3
-; SSE-X86-NEXT: subsd %xmm2, %xmm3
-; SSE-X86-NEXT: movsd %xmm3, {{[0-9]+}}(%esp)
+; SSE-X86-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
+; SSE-X86-NEXT: comisd %xmm0, %xmm2
+; SSE-X86-NEXT: xorpd %xmm1, %xmm1
+; SSE-X86-NEXT: ja .LBB18_2
+; SSE-X86-NEXT: # %bb.1:
+; SSE-X86-NEXT: movapd %xmm2, %xmm1
+; SSE-X86-NEXT: .LBB18_2:
+; SSE-X86-NEXT: subsd %xmm1, %xmm0
+; SSE-X86-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
+; SSE-X86-NEXT: setbe %al
; SSE-X86-NEXT: fldl {{[0-9]+}}(%esp)
; SSE-X86-NEXT: fnstcw {{[0-9]+}}(%esp)
-; SSE-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; SSE-X86-NEXT: orl $3072, %eax # imm = 0xC00
-; SSE-X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; SSE-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; SSE-X86-NEXT: orl $3072, %ecx # imm = 0xC00
+; SSE-X86-NEXT: movw %cx, {{[0-9]+}}(%esp)
; SSE-X86-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE-X86-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE-X86-NEXT: fldcw {{[0-9]+}}(%esp)
-; SSE-X86-NEXT: xorl %edx, %edx
-; SSE-X86-NEXT: ucomisd %xmm0, %xmm1
-; SSE-X86-NEXT: setbe %dl
+; SSE-X86-NEXT: movzbl %al, %edx
; SSE-X86-NEXT: shll $31, %edx
; SSE-X86-NEXT: xorl {{[0-9]+}}(%esp), %edx
; SSE-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -1174,16 +1189,18 @@ define i64 @fptoui_f64toi64(double %x) #0 {
;
; SSE-X64-LABEL: fptoui_f64toi64:
; SSE-X64: # %bb.0:
-; SSE-X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE-X64-NEXT: xorl %ecx, %ecx
-; SSE-X64-NEXT: ucomisd %xmm1, %xmm0
-; SSE-X64-NEXT: setae %cl
-; SSE-X64-NEXT: shlq $63, %rcx
-; SSE-X64-NEXT: movapd %xmm0, %xmm2
-; SSE-X64-NEXT: cmpltsd %xmm1, %xmm2
-; SSE-X64-NEXT: andnpd %xmm1, %xmm2
-; SSE-X64-NEXT: subsd %xmm2, %xmm0
-; SSE-X64-NEXT: cvttsd2si %xmm0, %rax
+; SSE-X64-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
+; SSE-X64-NEXT: comisd %xmm2, %xmm0
+; SSE-X64-NEXT: xorpd %xmm1, %xmm1
+; SSE-X64-NEXT: jb .LBB18_2
+; SSE-X64-NEXT: # %bb.1:
+; SSE-X64-NEXT: movapd %xmm2, %xmm1
+; SSE-X64-NEXT: .LBB18_2:
+; SSE-X64-NEXT: subsd %xmm1, %xmm0
+; SSE-X64-NEXT: cvttsd2si %xmm0, %rcx
+; SSE-X64-NEXT: setae %al
+; SSE-X64-NEXT: movzbl %al, %eax
+; SSE-X64-NEXT: shlq $63, %rax
; SSE-X64-NEXT: xorq %rcx, %rax
; SSE-X64-NEXT: retq
;
@@ -1198,15 +1215,18 @@ define i64 @fptoui_f64toi64(double %x) #0 {
; AVX1-X86-NEXT: subl $8, %esp
; AVX1-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX1-X86-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX1-X86-NEXT: vcmpltsd %xmm1, %xmm0, %xmm2
-; AVX1-X86-NEXT: vandnpd %xmm1, %xmm2, %xmm2
-; AVX1-X86-NEXT: vsubsd %xmm2, %xmm0, %xmm2
-; AVX1-X86-NEXT: vmovsd %xmm2, (%esp)
+; AVX1-X86-NEXT: vcomisd %xmm0, %xmm1
+; AVX1-X86-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; AVX1-X86-NEXT: ja .LBB18_2
+; AVX1-X86-NEXT: # %bb.1:
+; AVX1-X86-NEXT: vmovapd %xmm1, %xmm2
+; AVX1-X86-NEXT: .LBB18_2:
+; AVX1-X86-NEXT: vsubsd %xmm2, %xmm0, %xmm0
+; AVX1-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX1-X86-NEXT: fldl (%esp)
; AVX1-X86-NEXT: fisttpll (%esp)
-; AVX1-X86-NEXT: xorl %edx, %edx
-; AVX1-X86-NEXT: vucomisd %xmm0, %xmm1
-; AVX1-X86-NEXT: setbe %dl
+; AVX1-X86-NEXT: setbe %al
+; AVX1-X86-NEXT: movzbl %al, %edx
; AVX1-X86-NEXT: shll $31, %edx
; AVX1-X86-NEXT: xorl {{[0-9]+}}(%esp), %edx
; AVX1-X86-NEXT: movl (%esp), %eax
@@ -1218,14 +1238,17 @@ define i64 @fptoui_f64toi64(double %x) #0 {
; AVX1-X64-LABEL: fptoui_f64toi64:
; AVX1-X64: # %bb.0:
; AVX1-X64-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX1-X64-NEXT: xorl %ecx, %ecx
-; AVX1-X64-NEXT: vucomisd %xmm1, %xmm0
-; AVX1-X64-NEXT: setae %cl
-; AVX1-X64-NEXT: shlq $63, %rcx
-; AVX1-X64-NEXT: vcmpltsd %xmm1, %xmm0, %xmm2
-; AVX1-X64-NEXT: vandnpd %xmm1, %xmm2, %xmm1
-; AVX1-X64-NEXT: vsubsd %xmm1, %xmm0, %xmm0
-; AVX1-X64-NEXT: vcvttsd2si %xmm0, %rax
+; AVX1-X64-NEXT: vcomisd %xmm1, %xmm0
+; AVX1-X64-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; AVX1-X64-NEXT: jb .LBB18_2
+; AVX1-X64-NEXT: # %bb.1:
+; AVX1-X64-NEXT: vmovapd %xmm1, %xmm2
+; AVX1-X64-NEXT: .LBB18_2:
+; AVX1-X64-NEXT: vsubsd %xmm2, %xmm0, %xmm0
+; AVX1-X64-NEXT: vcvttsd2si %xmm0, %rcx
+; AVX1-X64-NEXT: setae %al
+; AVX1-X64-NEXT: movzbl %al, %eax
+; AVX1-X64-NEXT: shlq $63, %rax
; AVX1-X64-NEXT: xorq %rcx, %rax
; AVX1-X64-NEXT: retq
;
@@ -1240,10 +1263,11 @@ define i64 @fptoui_f64toi64(double %x) #0 {
; AVX512-X86-NEXT: subl $8, %esp
; AVX512-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX512-X86-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX512-X86-NEXT: vcmpltsd %xmm1, %xmm0, %k1
-; AVX512-X86-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; AVX512-X86-NEXT: xorl %edx, %edx
-; AVX512-X86-NEXT: vucomisd %xmm0, %xmm1
+; AVX512-X86-NEXT: vcomisd %xmm0, %xmm1
+; AVX512-X86-NEXT: seta %al
+; AVX512-X86-NEXT: kmovw %eax, %k1
+; AVX512-X86-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; AVX512-X86-NEXT: vmovsd %xmm2, %xmm1, %xmm1 {%k1}
; AVX512-X86-NEXT: vsubsd %xmm1, %xmm0, %xmm0
; AVX512-X86-NEXT: vmovsd %xmm0, (%esp)
@@ -1274,7 +1298,7 @@ define i64 @fptoui_f64toi64(double %x) #0 {
; CHECK-NEXT: subl $16, %esp
; CHECK-NEXT: fldl 8(%ebp)
; CHECK-NEXT: flds {{\.LCPI.*}}
-; CHECK-NEXT: fucom %st(1)
+; CHECK-NEXT: fcom %st(1)
; CHECK-NEXT: fnstsw %ax
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: # kill: def $ah killed $ah killed $ax
diff --git a/llvm/test/CodeGen/X86/fp80-strict-scalar.ll b/llvm/test/CodeGen/X86/fp80-strict-scalar.ll
index 51ffc1c48eee..0df9f33fb074 100644
--- a/llvm/test/CodeGen/X86/fp80-strict-scalar.ll
+++ b/llvm/test/CodeGen/X86/fp80-strict-scalar.ll
@@ -543,7 +543,7 @@ define i64 @fp80_to_uint64(x86_fp80 %x) #0 {
; X86-NEXT: subl $16, %esp
; X86-NEXT: fldt 8(%ebp)
; X86-NEXT: flds {{\.LCPI.*}}
-; X86-NEXT: fucom %st(1)
+; X86-NEXT: fcom %st(1)
; X86-NEXT: fnstsw %ax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: # kill: def $ah killed $ah killed $ax
@@ -579,7 +579,7 @@ define i64 @fp80_to_uint64(x86_fp80 %x) #0 {
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: flds {{.*}}(%rip)
; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: fucomi %st(1), %st
+; X64-NEXT: fcomi %st(1), %st
; X64-NEXT: setbe %al
; X64-NEXT: fldz
; X64-NEXT: fxch %st(1)
diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
index fdefd937e7ed..ab5c6b7f9989 100644
--- a/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
@@ -176,7 +176,7 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
; SSE-32-NEXT: andl $-8, %esp
; SSE-32-NEXT: subl $24, %esp
; SSE-32-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
-; SSE-32-NEXT: ucomisd %xmm2, %xmm0
+; SSE-32-NEXT: comisd %xmm2, %xmm0
; SSE-32-NEXT: xorpd %xmm1, %xmm1
; SSE-32-NEXT: xorpd %xmm3, %xmm3
; SSE-32-NEXT: jb .LBB1_2
@@ -196,7 +196,7 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-32-NEXT: ucomisd %xmm2, %xmm0
+; SSE-32-NEXT: comisd %xmm2, %xmm0
; SSE-32-NEXT: jb .LBB1_4
; SSE-32-NEXT: # %bb.3:
; SSE-32-NEXT: movapd %xmm2, %xmm1
@@ -232,29 +232,33 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
;
; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
; SSE-64: # %bb.0:
-; SSE-64-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
-; SSE-64-NEXT: xorl %eax, %eax
-; SSE-64-NEXT: ucomisd %xmm2, %xmm0
-; SSE-64-NEXT: setae %al
-; SSE-64-NEXT: shlq $63, %rax
-; SSE-64-NEXT: movapd %xmm0, %xmm1
-; SSE-64-NEXT: cmpltsd %xmm2, %xmm1
-; SSE-64-NEXT: andnpd %xmm2, %xmm1
-; SSE-64-NEXT: movapd %xmm0, %xmm3
-; SSE-64-NEXT: subsd %xmm1, %xmm3
-; SSE-64-NEXT: cvttsd2si %xmm3, %rcx
+; SSE-64-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
+; SSE-64-NEXT: comisd %xmm3, %xmm0
+; SSE-64-NEXT: xorpd %xmm2, %xmm2
+; SSE-64-NEXT: xorpd %xmm1, %xmm1
+; SSE-64-NEXT: jb .LBB1_2
+; SSE-64-NEXT: # %bb.1:
+; SSE-64-NEXT: movapd %xmm3, %xmm1
+; SSE-64-NEXT: .LBB1_2:
+; SSE-64-NEXT: movapd %xmm0, %xmm4
+; SSE-64-NEXT: subsd %xmm1, %xmm4
+; SSE-64-NEXT: cvttsd2si %xmm4, %rax
+; SSE-64-NEXT: setae %cl
+; SSE-64-NEXT: movzbl %cl, %ecx
+; SSE-64-NEXT: shlq $63, %rcx
; SSE-64-NEXT: xorq %rax, %rcx
; SSE-64-NEXT: movq %rcx, %xmm1
; SSE-64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-64-NEXT: xorl %eax, %eax
-; SSE-64-NEXT: ucomisd %xmm2, %xmm0
-; SSE-64-NEXT: setae %al
-; SSE-64-NEXT: shlq $63, %rax
-; SSE-64-NEXT: movapd %xmm0, %xmm3
-; SSE-64-NEXT: cmpltsd %xmm2, %xmm3
-; SSE-64-NEXT: andnpd %xmm2, %xmm3
-; SSE-64-NEXT: subsd %xmm3, %xmm0
-; SSE-64-NEXT: cvttsd2si %xmm0, %rcx
+; SSE-64-NEXT: comisd %xmm3, %xmm0
+; SSE-64-NEXT: jb .LBB1_4
+; SSE-64-NEXT: # %bb.3:
+; SSE-64-NEXT: movapd %xmm3, %xmm2
+; SSE-64-NEXT: .LBB1_4:
+; SSE-64-NEXT: subsd %xmm2, %xmm0
+; SSE-64-NEXT: cvttsd2si %xmm0, %rax
+; SSE-64-NEXT: setae %cl
+; SSE-64-NEXT: movzbl %cl, %ecx
+; SSE-64-NEXT: shlq $63, %rcx
; SSE-64-NEXT: xorq %rax, %rcx
; SSE-64-NEXT: movq %rcx, %xmm0
; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
@@ -272,7 +276,7 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
; AVX-32-NEXT: subl $16, %esp
; AVX-32-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
; AVX-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-32-NEXT: vucomisd %xmm1, %xmm3
+; AVX-32-NEXT: vcomisd %xmm1, %xmm3
; AVX-32-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; AVX-32-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; AVX-32-NEXT: jb .LBB1_2
@@ -287,7 +291,7 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
; AVX-32-NEXT: movzbl %al, %eax
; AVX-32-NEXT: shll $31, %eax
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; AVX-32-NEXT: vucomisd %xmm1, %xmm0
+; AVX-32-NEXT: vcomisd %xmm1, %xmm0
; AVX-32-NEXT: jb .LBB1_4
; AVX-32-NEXT: # %bb.3:
; AVX-32-NEXT: vmovapd %xmm1, %xmm2
@@ -312,28 +316,34 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
; AVX-64-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomisd %xmm1, %xmm0
-; AVX-64-NEXT: setae %al
-; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltsd %xmm1, %xmm0, %xmm2
-; AVX-64-NEXT: vandnpd %xmm1, %xmm2, %xmm2
-; AVX-64-NEXT: vsubsd %xmm2, %xmm0, %xmm2
-; AVX-64-NEXT: vcvttsd2si %xmm2, %rcx
+; AVX-64-NEXT: vcomisd %xmm1, %xmm0
+; AVX-64-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; AVX-64-NEXT: vxorpd %xmm3, %xmm3, %xmm3
+; AVX-64-NEXT: jb .LBB1_2
+; AVX-64-NEXT: # %bb.1:
+; AVX-64-NEXT: vmovapd %xmm1, %xmm3
+; AVX-64-NEXT: .LBB1_2:
+; AVX-64-NEXT: vsubsd %xmm3, %xmm0, %xmm3
+; AVX-64-NEXT: vcvttsd2si %xmm3, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
; AVX-64-NEXT: xorq %rax, %rcx
-; AVX-64-NEXT: vmovq %rcx, %xmm2
+; AVX-64-NEXT: vmovq %rcx, %xmm3
; AVX-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomisd %xmm1, %xmm0
-; AVX-64-NEXT: setae %al
-; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltsd %xmm1, %xmm0, %xmm3
-; AVX-64-NEXT: vandnpd %xmm1, %xmm3, %xmm1
-; AVX-64-NEXT: vsubsd %xmm1, %xmm0, %xmm0
-; AVX-64-NEXT: vcvttsd2si %xmm0, %rcx
+; AVX-64-NEXT: vcomisd %xmm1, %xmm0
+; AVX-64-NEXT: jb .LBB1_4
+; AVX-64-NEXT: # %bb.3:
+; AVX-64-NEXT: vmovapd %xmm1, %xmm2
+; AVX-64-NEXT: .LBB1_4:
+; AVX-64-NEXT: vsubsd %xmm2, %xmm0, %xmm0
+; AVX-64-NEXT: vcvttsd2si %xmm0, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
; AVX-64-NEXT: xorq %rax, %rcx
; AVX-64-NEXT: vmovq %rcx, %xmm0
-; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
; AVX-64-NEXT: retq
;
; AVX512VL-32-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
@@ -348,7 +358,7 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512VL-32-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
; AVX512VL-32-NEXT: xorl %eax, %eax
-; AVX512VL-32-NEXT: vucomisd %xmm2, %xmm1
+; AVX512VL-32-NEXT: vcomisd %xmm2, %xmm1
; AVX512VL-32-NEXT: setb %cl
; AVX512VL-32-NEXT: kmovw %ecx, %k1
; AVX512VL-32-NEXT: vxorpd %xmm3, %xmm3, %xmm3
@@ -362,7 +372,7 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX512VL-32-NEXT: xorl %ecx, %ecx
-; AVX512VL-32-NEXT: vucomisd %xmm2, %xmm0
+; AVX512VL-32-NEXT: vcomisd %xmm2, %xmm0
; AVX512VL-32-NEXT: setb %dl
; AVX512VL-32-NEXT: kmovw %edx, %k1
; AVX512VL-32-NEXT: vmovsd %xmm3, %xmm2, %xmm2 {%k1}
@@ -559,48 +569,50 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; SSE-32-NEXT: .cfi_def_cfa_register %ebp
; SSE-32-NEXT: andl $-8, %esp
; SSE-32-NEXT: subl $24, %esp
-; SSE-32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE-32-NEXT: movaps %xmm0, %xmm2
-; SSE-32-NEXT: cmpltss %xmm1, %xmm2
-; SSE-32-NEXT: andnps %xmm1, %xmm2
-; SSE-32-NEXT: movaps %xmm0, %xmm3
-; SSE-32-NEXT: subss %xmm2, %xmm3
-; SSE-32-NEXT: movss %xmm3, {{[0-9]+}}(%esp)
-; SSE-32-NEXT: movaps %xmm0, %xmm2
-; SSE-32-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3]
+; SSE-32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE-32-NEXT: comiss %xmm2, %xmm0
+; SSE-32-NEXT: xorps %xmm1, %xmm1
+; SSE-32-NEXT: xorps %xmm3, %xmm3
+; SSE-32-NEXT: jb .LBB3_2
+; SSE-32-NEXT: # %bb.1:
; SSE-32-NEXT: movaps %xmm2, %xmm3
-; SSE-32-NEXT: cmpltss %xmm1, %xmm3
-; SSE-32-NEXT: andnps %xmm1, %xmm3
-; SSE-32-NEXT: movaps %xmm2, %xmm4
+; SSE-32-NEXT: .LBB3_2:
+; SSE-32-NEXT: movaps %xmm0, %xmm4
; SSE-32-NEXT: subss %xmm3, %xmm4
; SSE-32-NEXT: movss %xmm4, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: setae %al
; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp)
-; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
-; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; SSE-32-NEXT: orl $3072, %ecx # imm = 0xC00
+; SSE-32-NEXT: movw %cx, {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
+; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE-32-NEXT: comiss %xmm2, %xmm0
+; SSE-32-NEXT: jb .LBB3_4
+; SSE-32-NEXT: # %bb.3:
+; SSE-32-NEXT: movaps %xmm2, %xmm1
+; SSE-32-NEXT: .LBB3_4:
+; SSE-32-NEXT: subss %xmm1, %xmm0
+; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: setae %cl
; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
; SSE-32-NEXT: fnstcw (%esp)
-; SSE-32-NEXT: movzwl (%esp), %eax
-; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
-; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: movzwl (%esp), %edx
+; SSE-32-NEXT: orl $3072, %edx # imm = 0xC00
+; SSE-32-NEXT: movw %dx, {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldcw (%esp)
-; SSE-32-NEXT: xorl %eax, %eax
-; SSE-32-NEXT: ucomiss %xmm1, %xmm0
-; SSE-32-NEXT: setae %al
+; SSE-32-NEXT: movzbl %al, %eax
; SSE-32-NEXT: shll $31, %eax
; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; SSE-32-NEXT: movd %eax, %xmm3
+; SSE-32-NEXT: movd %eax, %xmm1
; SSE-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; SSE-32-NEXT: xorl %eax, %eax
-; SSE-32-NEXT: ucomiss %xmm1, %xmm2
-; SSE-32-NEXT: setae %al
+; SSE-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-32-NEXT: movzbl %cl, %eax
; SSE-32-NEXT: shll $31, %eax
; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; SSE-32-NEXT: movd %eax, %xmm1
@@ -614,29 +626,33 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
;
; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
; SSE-64: # %bb.0:
-; SSE-64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE-64-NEXT: xorl %eax, %eax
-; SSE-64-NEXT: ucomiss %xmm2, %xmm0
-; SSE-64-NEXT: setae %al
-; SSE-64-NEXT: shlq $63, %rax
-; SSE-64-NEXT: movaps %xmm0, %xmm1
-; SSE-64-NEXT: cmpltss %xmm2, %xmm1
-; SSE-64-NEXT: andnps %xmm2, %xmm1
-; SSE-64-NEXT: movaps %xmm0, %xmm3
-; SSE-64-NEXT: subss %xmm1, %xmm3
-; SSE-64-NEXT: cvttss2si %xmm3, %rcx
+; SSE-64-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; SSE-64-NEXT: comiss %xmm3, %xmm0
+; SSE-64-NEXT: xorps %xmm2, %xmm2
+; SSE-64-NEXT: xorps %xmm1, %xmm1
+; SSE-64-NEXT: jb .LBB3_2
+; SSE-64-NEXT: # %bb.1:
+; SSE-64-NEXT: movaps %xmm3, %xmm1
+; SSE-64-NEXT: .LBB3_2:
+; SSE-64-NEXT: movaps %xmm0, %xmm4
+; SSE-64-NEXT: subss %xmm1, %xmm4
+; SSE-64-NEXT: cvttss2si %xmm4, %rax
+; SSE-64-NEXT: setae %cl
+; SSE-64-NEXT: movzbl %cl, %ecx
+; SSE-64-NEXT: shlq $63, %rcx
; SSE-64-NEXT: xorq %rax, %rcx
; SSE-64-NEXT: movq %rcx, %xmm1
; SSE-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-64-NEXT: xorl %eax, %eax
-; SSE-64-NEXT: ucomiss %xmm2, %xmm0
-; SSE-64-NEXT: setae %al
-; SSE-64-NEXT: shlq $63, %rax
-; SSE-64-NEXT: movaps %xmm0, %xmm3
-; SSE-64-NEXT: cmpltss %xmm2, %xmm3
-; SSE-64-NEXT: andnps %xmm2, %xmm3
-; SSE-64-NEXT: subss %xmm3, %xmm0
-; SSE-64-NEXT: cvttss2si %xmm0, %rcx
+; SSE-64-NEXT: comiss %xmm3, %xmm0
+; SSE-64-NEXT: jb .LBB3_4
+; SSE-64-NEXT: # %bb.3:
+; SSE-64-NEXT: movaps %xmm3, %xmm2
+; SSE-64-NEXT: .LBB3_4:
+; SSE-64-NEXT: subss %xmm2, %xmm0
+; SSE-64-NEXT: cvttss2si %xmm0, %rax
+; SSE-64-NEXT: setae %cl
+; SSE-64-NEXT: movzbl %cl, %ecx
+; SSE-64-NEXT: shlq $63, %rcx
; SSE-64-NEXT: xorq %rax, %rcx
; SSE-64-NEXT: movq %rcx, %xmm0
; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
@@ -652,28 +668,34 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; AVX-32-NEXT: .cfi_def_cfa_register %ebp
; AVX-32-NEXT: andl $-8, %esp
; AVX-32-NEXT: subl $16, %esp
-; AVX-32-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; AVX-32-NEXT: vcmpltss %xmm2, %xmm1, %xmm3
-; AVX-32-NEXT: vandnps %xmm2, %xmm3, %xmm3
-; AVX-32-NEXT: vsubss %xmm3, %xmm1, %xmm3
+; AVX-32-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; AVX-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-32-NEXT: vcomiss %xmm1, %xmm3
+; AVX-32-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX-32-NEXT: vxorps %xmm4, %xmm4, %xmm4
+; AVX-32-NEXT: jb .LBB3_2
+; AVX-32-NEXT: # %bb.1:
+; AVX-32-NEXT: vmovaps %xmm1, %xmm4
+; AVX-32-NEXT: .LBB3_2:
+; AVX-32-NEXT: vsubss %xmm4, %xmm3, %xmm3
; AVX-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
-; AVX-32-NEXT: vcmpltss %xmm2, %xmm0, %xmm3
-; AVX-32-NEXT: vandnps %xmm2, %xmm3, %xmm3
-; AVX-32-NEXT: vsubss %xmm3, %xmm0, %xmm3
-; AVX-32-NEXT: vmovss %xmm3, (%esp)
; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
-; AVX-32-NEXT: flds (%esp)
-; AVX-32-NEXT: fisttpll (%esp)
-; AVX-32-NEXT: xorl %eax, %eax
-; AVX-32-NEXT: vucomiss %xmm2, %xmm1
; AVX-32-NEXT: setae %al
+; AVX-32-NEXT: movzbl %al, %eax
; AVX-32-NEXT: shll $31, %eax
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; AVX-32-NEXT: xorl %ecx, %ecx
-; AVX-32-NEXT: vucomiss %xmm2, %xmm0
+; AVX-32-NEXT: vcomiss %xmm1, %xmm0
+; AVX-32-NEXT: jb .LBB3_4
+; AVX-32-NEXT: # %bb.3:
+; AVX-32-NEXT: vmovaps %xmm1, %xmm2
+; AVX-32-NEXT: .LBB3_4:
+; AVX-32-NEXT: vsubss %xmm2, %xmm0, %xmm0
+; AVX-32-NEXT: vmovss %xmm0, (%esp)
+; AVX-32-NEXT: flds (%esp)
+; AVX-32-NEXT: fisttpll (%esp)
; AVX-32-NEXT: setae %cl
+; AVX-32-NEXT: movzbl %cl, %ecx
; AVX-32-NEXT: shll $31, %ecx
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
@@ -688,28 +710,34 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; AVX-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomiss %xmm1, %xmm0
-; AVX-64-NEXT: setae %al
-; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltss %xmm1, %xmm0, %xmm2
-; AVX-64-NEXT: vandnps %xmm1, %xmm2, %xmm2
-; AVX-64-NEXT: vsubss %xmm2, %xmm0, %xmm2
-; AVX-64-NEXT: vcvttss2si %xmm2, %rcx
+; AVX-64-NEXT: vcomiss %xmm1, %xmm0
+; AVX-64-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX-64-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; AVX-64-NEXT: jb .LBB3_2
+; AVX-64-NEXT: # %bb.1:
+; AVX-64-NEXT: vmovaps %xmm1, %xmm3
+; AVX-64-NEXT: .LBB3_2:
+; AVX-64-NEXT: vsubss %xmm3, %xmm0, %xmm3
+; AVX-64-NEXT: vcvttss2si %xmm3, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
; AVX-64-NEXT: xorq %rax, %rcx
-; AVX-64-NEXT: vmovq %rcx, %xmm2
+; AVX-64-NEXT: vmovq %rcx, %xmm3
; AVX-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomiss %xmm1, %xmm0
-; AVX-64-NEXT: setae %al
-; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltss %xmm1, %xmm0, %xmm3
-; AVX-64-NEXT: vandnps %xmm1, %xmm3, %xmm1
-; AVX-64-NEXT: vsubss %xmm1, %xmm0, %xmm0
-; AVX-64-NEXT: vcvttss2si %xmm0, %rcx
+; AVX-64-NEXT: vcomiss %xmm1, %xmm0
+; AVX-64-NEXT: jb .LBB3_4
+; AVX-64-NEXT: # %bb.3:
+; AVX-64-NEXT: vmovaps %xmm1, %xmm2
+; AVX-64-NEXT: .LBB3_4:
+; AVX-64-NEXT: vsubss %xmm2, %xmm0, %xmm0
+; AVX-64-NEXT: vcvttss2si %xmm0, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
; AVX-64-NEXT: xorq %rax, %rcx
; AVX-64-NEXT: vmovq %rcx, %xmm0
-; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
; AVX-64-NEXT: retq
;
; AVX512VL-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
@@ -723,28 +751,29 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; AVX512VL-32-NEXT: subl $16, %esp
; AVX512VL-32-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; AVX512VL-32-NEXT: vcmpltss %xmm2, %xmm1, %k1
+; AVX512VL-32-NEXT: xorl %eax, %eax
+; AVX512VL-32-NEXT: vcomiss %xmm2, %xmm1
+; AVX512VL-32-NEXT: setb %cl
+; AVX512VL-32-NEXT: kmovw %ecx, %k1
; AVX512VL-32-NEXT: vxorps %xmm3, %xmm3, %xmm3
; AVX512VL-32-NEXT: vmovaps %xmm2, %xmm4
; AVX512VL-32-NEXT: vmovss %xmm3, %xmm4, %xmm4 {%k1}
-; AVX512VL-32-NEXT: vsubss %xmm4, %xmm1, %xmm4
-; AVX512VL-32-NEXT: vmovss %xmm4, {{[0-9]+}}(%esp)
-; AVX512VL-32-NEXT: vcmpltss %xmm2, %xmm0, %k1
-; AVX512VL-32-NEXT: vmovaps %xmm2, %xmm4
-; AVX512VL-32-NEXT: vmovss %xmm3, %xmm4, %xmm4 {%k1}
-; AVX512VL-32-NEXT: vsubss %xmm4, %xmm0, %xmm3
-; AVX512VL-32-NEXT: vmovss %xmm3, (%esp)
+; AVX512VL-32-NEXT: vsubss %xmm4, %xmm1, %xmm1
+; AVX512VL-32-NEXT: vmovss %xmm1, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
-; AVX512VL-32-NEXT: flds (%esp)
-; AVX512VL-32-NEXT: fisttpll (%esp)
-; AVX512VL-32-NEXT: xorl %eax, %eax
-; AVX512VL-32-NEXT: vucomiss %xmm2, %xmm1
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX512VL-32-NEXT: xorl %ecx, %ecx
-; AVX512VL-32-NEXT: vucomiss %xmm2, %xmm0
+; AVX512VL-32-NEXT: vcomiss %xmm2, %xmm0
+; AVX512VL-32-NEXT: setb %dl
+; AVX512VL-32-NEXT: kmovw %edx, %k1
+; AVX512VL-32-NEXT: vmovss %xmm3, %xmm2, %xmm2 {%k1}
+; AVX512VL-32-NEXT: vsubss %xmm2, %xmm0, %xmm0
+; AVX512VL-32-NEXT: vmovss %xmm0, (%esp)
+; AVX512VL-32-NEXT: flds (%esp)
+; AVX512VL-32-NEXT: fisttpll (%esp)
; AVX512VL-32-NEXT: setae %cl
; AVX512VL-32-NEXT: shll $31, %ecx
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
@@ -828,28 +857,32 @@ define <2 x i32> @strict_vector_fptosi_v2f64_to_v2i32(<2 x double> %a) #0 {
define <2 x i32> @strict_vector_fptoui_v2f64_to_v2i32(<2 x double> %a) #0 {
; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
; SSE-32: # %bb.0:
-; SSE-32-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
-; SSE-32-NEXT: xorl %eax, %eax
-; SSE-32-NEXT: ucomisd %xmm2, %xmm0
+; SSE-32-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
+; SSE-32-NEXT: comisd %xmm3, %xmm0
+; SSE-32-NEXT: xorpd %xmm2, %xmm2
+; SSE-32-NEXT: xorpd %xmm1, %xmm1
+; SSE-32-NEXT: jb .LBB5_2
+; SSE-32-NEXT: # %bb.1:
+; SSE-32-NEXT: movapd %xmm3, %xmm1
+; SSE-32-NEXT: .LBB5_2:
; SSE-32-NEXT: setae %al
+; SSE-32-NEXT: movzbl %al, %eax
; SSE-32-NEXT: shll $31, %eax
-; SSE-32-NEXT: movapd %xmm0, %xmm1
-; SSE-32-NEXT: cmpltsd %xmm2, %xmm1
-; SSE-32-NEXT: andnpd %xmm2, %xmm1
-; SSE-32-NEXT: movapd %xmm0, %xmm3
-; SSE-32-NEXT: subsd %xmm1, %xmm3
-; SSE-32-NEXT: cvttsd2si %xmm3, %ecx
+; SSE-32-NEXT: movapd %xmm0, %xmm4
+; SSE-32-NEXT: subsd %xmm1, %xmm4
+; SSE-32-NEXT: cvttsd2si %xmm4, %ecx
; SSE-32-NEXT: xorl %eax, %ecx
; SSE-32-NEXT: movd %ecx, %xmm1
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-32-NEXT: xorl %eax, %eax
-; SSE-32-NEXT: ucomisd %xmm2, %xmm0
+; SSE-32-NEXT: comisd %xmm3, %xmm0
+; SSE-32-NEXT: jb .LBB5_4
+; SSE-32-NEXT: # %bb.3:
+; SSE-32-NEXT: movapd %xmm3, %xmm2
+; SSE-32-NEXT: .LBB5_4:
; SSE-32-NEXT: setae %al
+; SSE-32-NEXT: movzbl %al, %eax
; SSE-32-NEXT: shll $31, %eax
-; SSE-32-NEXT: movapd %xmm0, %xmm3
-; SSE-32-NEXT: cmpltsd %xmm2, %xmm3
-; SSE-32-NEXT: andnpd %xmm2, %xmm3
-; SSE-32-NEXT: subsd %xmm3, %xmm0
+; SSE-32-NEXT: subsd %xmm2, %xmm0
; SSE-32-NEXT: cvttsd2si %xmm0, %ecx
; SSE-32-NEXT: xorl %eax, %ecx
; SSE-32-NEXT: movd %ecx, %xmm0
@@ -978,28 +1011,32 @@ define <2 x i32> @strict_vector_fptosi_v2f32_to_v2i32(<2 x float> %a) #0 {
define <2 x i32> @strict_vector_fptoui_v2f32_to_v2i32(<2 x float> %a) #0 {
; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
; SSE-32: # %bb.0:
-; SSE-32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE-32-NEXT: xorl %eax, %eax
-; SSE-32-NEXT: ucomiss %xmm2, %xmm0
+; SSE-32-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; SSE-32-NEXT: comiss %xmm3, %xmm0
+; SSE-32-NEXT: xorps %xmm2, %xmm2
+; SSE-32-NEXT: xorps %xmm1, %xmm1
+; SSE-32-NEXT: jb .LBB7_2
+; SSE-32-NEXT: # %bb.1:
+; SSE-32-NEXT: movaps %xmm3, %xmm1
+; SSE-32-NEXT: .LBB7_2:
; SSE-32-NEXT: setae %al
+; SSE-32-NEXT: movzbl %al, %eax
; SSE-32-NEXT: shll $31, %eax
-; SSE-32-NEXT: movaps %xmm0, %xmm1
-; SSE-32-NEXT: cmpltss %xmm2, %xmm1
-; SSE-32-NEXT: andnps %xmm2, %xmm1
-; SSE-32-NEXT: movaps %xmm0, %xmm3
-; SSE-32-NEXT: subss %xmm1, %xmm3
-; SSE-32-NEXT: cvttss2si %xmm3, %ecx
+; SSE-32-NEXT: movaps %xmm0, %xmm4
+; SSE-32-NEXT: subss %xmm1, %xmm4
+; SSE-32-NEXT: cvttss2si %xmm4, %ecx
; SSE-32-NEXT: xorl %eax, %ecx
; SSE-32-NEXT: movd %ecx, %xmm1
; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-32-NEXT: xorl %eax, %eax
-; SSE-32-NEXT: ucomiss %xmm2, %xmm0
+; SSE-32-NEXT: comiss %xmm3, %xmm0
+; SSE-32-NEXT: jb .LBB7_4
+; SSE-32-NEXT: # %bb.3:
+; SSE-32-NEXT: movaps %xmm3, %xmm2
+; SSE-32-NEXT: .LBB7_4:
; SSE-32-NEXT: setae %al
+; SSE-32-NEXT: movzbl %al, %eax
; SSE-32-NEXT: shll $31, %eax
-; SSE-32-NEXT: movaps %xmm0, %xmm3
-; SSE-32-NEXT: cmpltss %xmm2, %xmm3
-; SSE-32-NEXT: andnps %xmm2, %xmm3
-; SSE-32-NEXT: subss %xmm3, %xmm0
+; SSE-32-NEXT: subss %xmm2, %xmm0
; SSE-32-NEXT: cvttss2si %xmm0, %ecx
; SSE-32-NEXT: xorl %eax, %ecx
; SSE-32-NEXT: movd %ecx, %xmm0
@@ -1542,7 +1579,7 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
; SSE-32-NEXT: andl $-8, %esp
; SSE-32-NEXT: subl $24, %esp
; SSE-32-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
-; SSE-32-NEXT: ucomisd %xmm2, %xmm0
+; SSE-32-NEXT: comisd %xmm2, %xmm0
; SSE-32-NEXT: xorpd %xmm1, %xmm1
; SSE-32-NEXT: xorpd %xmm3, %xmm3
; SSE-32-NEXT: jb .LBB17_2
@@ -1562,7 +1599,7 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-32-NEXT: ucomisd %xmm2, %xmm0
+; SSE-32-NEXT: comisd %xmm2, %xmm0
; SSE-32-NEXT: jb .LBB17_4
; SSE-32-NEXT: # %bb.3:
; SSE-32-NEXT: movapd %xmm2, %xmm1
@@ -1598,29 +1635,33 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
;
; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
; SSE-64: # %bb.0:
-; SSE-64-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
-; SSE-64-NEXT: xorl %eax, %eax
-; SSE-64-NEXT: ucomisd %xmm2, %xmm0
-; SSE-64-NEXT: setae %al
-; SSE-64-NEXT: shlq $63, %rax
-; SSE-64-NEXT: movapd %xmm0, %xmm1
-; SSE-64-NEXT: cmpltsd %xmm2, %xmm1
-; SSE-64-NEXT: andnpd %xmm2, %xmm1
-; SSE-64-NEXT: movapd %xmm0, %xmm3
-; SSE-64-NEXT: subsd %xmm1, %xmm3
-; SSE-64-NEXT: cvttsd2si %xmm3, %rcx
+; SSE-64-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
+; SSE-64-NEXT: comisd %xmm3, %xmm0
+; SSE-64-NEXT: xorpd %xmm2, %xmm2
+; SSE-64-NEXT: xorpd %xmm1, %xmm1
+; SSE-64-NEXT: jb .LBB17_2
+; SSE-64-NEXT: # %bb.1:
+; SSE-64-NEXT: movapd %xmm3, %xmm1
+; SSE-64-NEXT: .LBB17_2:
+; SSE-64-NEXT: movapd %xmm0, %xmm4
+; SSE-64-NEXT: subsd %xmm1, %xmm4
+; SSE-64-NEXT: cvttsd2si %xmm4, %rax
+; SSE-64-NEXT: setae %cl
+; SSE-64-NEXT: movzbl %cl, %ecx
+; SSE-64-NEXT: shlq $63, %rcx
; SSE-64-NEXT: xorq %rax, %rcx
; SSE-64-NEXT: movq %rcx, %xmm1
; SSE-64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-64-NEXT: xorl %eax, %eax
-; SSE-64-NEXT: ucomisd %xmm2, %xmm0
-; SSE-64-NEXT: setae %al
-; SSE-64-NEXT: shlq $63, %rax
-; SSE-64-NEXT: movapd %xmm0, %xmm3
-; SSE-64-NEXT: cmpltsd %xmm2, %xmm3
-; SSE-64-NEXT: andnpd %xmm2, %xmm3
-; SSE-64-NEXT: subsd %xmm3, %xmm0
-; SSE-64-NEXT: cvttsd2si %xmm0, %rcx
+; SSE-64-NEXT: comisd %xmm3, %xmm0
+; SSE-64-NEXT: jb .LBB17_4
+; SSE-64-NEXT: # %bb.3:
+; SSE-64-NEXT: movapd %xmm3, %xmm2
+; SSE-64-NEXT: .LBB17_4:
+; SSE-64-NEXT: subsd %xmm2, %xmm0
+; SSE-64-NEXT: cvttsd2si %xmm0, %rax
+; SSE-64-NEXT: setae %cl
+; SSE-64-NEXT: movzbl %cl, %ecx
+; SSE-64-NEXT: shlq $63, %rcx
; SSE-64-NEXT: xorq %rax, %rcx
; SSE-64-NEXT: movq %rcx, %xmm0
; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
@@ -1638,7 +1679,7 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
; AVX-32-NEXT: subl $16, %esp
; AVX-32-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
; AVX-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-32-NEXT: vucomisd %xmm1, %xmm3
+; AVX-32-NEXT: vcomisd %xmm1, %xmm3
; AVX-32-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; AVX-32-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; AVX-32-NEXT: jb .LBB17_2
@@ -1653,7 +1694,7 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
; AVX-32-NEXT: movzbl %al, %eax
; AVX-32-NEXT: shll $31, %eax
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; AVX-32-NEXT: vucomisd %xmm1, %xmm0
+; AVX-32-NEXT: vcomisd %xmm1, %xmm0
; AVX-32-NEXT: jb .LBB17_4
; AVX-32-NEXT: # %bb.3:
; AVX-32-NEXT: vmovapd %xmm1, %xmm2
@@ -1678,28 +1719,34 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
; AVX-64-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomisd %xmm1, %xmm0
-; AVX-64-NEXT: setae %al
-; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltsd %xmm1, %xmm0, %xmm2
-; AVX-64-NEXT: vandnpd %xmm1, %xmm2, %xmm2
-; AVX-64-NEXT: vsubsd %xmm2, %xmm0, %xmm2
-; AVX-64-NEXT: vcvttsd2si %xmm2, %rcx
+; AVX-64-NEXT: vcomisd %xmm1, %xmm0
+; AVX-64-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; AVX-64-NEXT: vxorpd %xmm3, %xmm3, %xmm3
+; AVX-64-NEXT: jb .LBB17_2
+; AVX-64-NEXT: # %bb.1:
+; AVX-64-NEXT: vmovapd %xmm1, %xmm3
+; AVX-64-NEXT: .LBB17_2:
+; AVX-64-NEXT: vsubsd %xmm3, %xmm0, %xmm3
+; AVX-64-NEXT: vcvttsd2si %xmm3, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
; AVX-64-NEXT: xorq %rax, %rcx
-; AVX-64-NEXT: vmovq %rcx, %xmm2
+; AVX-64-NEXT: vmovq %rcx, %xmm3
; AVX-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomisd %xmm1, %xmm0
-; AVX-64-NEXT: setae %al
-; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltsd %xmm1, %xmm0, %xmm3
-; AVX-64-NEXT: vandnpd %xmm1, %xmm3, %xmm1
-; AVX-64-NEXT: vsubsd %xmm1, %xmm0, %xmm0
-; AVX-64-NEXT: vcvttsd2si %xmm0, %rcx
+; AVX-64-NEXT: vcomisd %xmm1, %xmm0
+; AVX-64-NEXT: jb .LBB17_4
+; AVX-64-NEXT: # %bb.3:
+; AVX-64-NEXT: vmovapd %xmm1, %xmm2
+; AVX-64-NEXT: .LBB17_4:
+; AVX-64-NEXT: vsubsd %xmm2, %xmm0, %xmm0
+; AVX-64-NEXT: vcvttsd2si %xmm0, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
; AVX-64-NEXT: xorq %rax, %rcx
; AVX-64-NEXT: vmovq %rcx, %xmm0
-; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
; AVX-64-NEXT: retq
;
; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
@@ -1863,48 +1910,50 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
; SSE-32-NEXT: .cfi_def_cfa_register %ebp
; SSE-32-NEXT: andl $-8, %esp
; SSE-32-NEXT: subl $24, %esp
-; SSE-32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE-32-NEXT: movaps %xmm0, %xmm2
-; SSE-32-NEXT: cmpltss %xmm1, %xmm2
-; SSE-32-NEXT: andnps %xmm1, %xmm2
-; SSE-32-NEXT: movaps %xmm0, %xmm3
-; SSE-32-NEXT: subss %xmm2, %xmm3
-; SSE-32-NEXT: movss %xmm3, {{[0-9]+}}(%esp)
-; SSE-32-NEXT: movaps %xmm0, %xmm2
-; SSE-32-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3]
+; SSE-32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE-32-NEXT: comiss %xmm2, %xmm0
+; SSE-32-NEXT: xorps %xmm1, %xmm1
+; SSE-32-NEXT: xorps %xmm3, %xmm3
+; SSE-32-NEXT: jb .LBB19_2
+; SSE-32-NEXT: # %bb.1:
; SSE-32-NEXT: movaps %xmm2, %xmm3
-; SSE-32-NEXT: cmpltss %xmm1, %xmm3
-; SSE-32-NEXT: andnps %xmm1, %xmm3
-; SSE-32-NEXT: movaps %xmm2, %xmm4
+; SSE-32-NEXT: .LBB19_2:
+; SSE-32-NEXT: movaps %xmm0, %xmm4
; SSE-32-NEXT: subss %xmm3, %xmm4
; SSE-32-NEXT: movss %xmm4, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: setae %al
; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp)
-; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
-; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; SSE-32-NEXT: orl $3072, %ecx # imm = 0xC00
+; SSE-32-NEXT: movw %cx, {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
+; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE-32-NEXT: comiss %xmm2, %xmm0
+; SSE-32-NEXT: jb .LBB19_4
+; SSE-32-NEXT: # %bb.3:
+; SSE-32-NEXT: movaps %xmm2, %xmm1
+; SSE-32-NEXT: .LBB19_4:
+; SSE-32-NEXT: subss %xmm1, %xmm0
+; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: setae %cl
; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
; SSE-32-NEXT: fnstcw (%esp)
-; SSE-32-NEXT: movzwl (%esp), %eax
-; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
-; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: movzwl (%esp), %edx
+; SSE-32-NEXT: orl $3072, %edx # imm = 0xC00
+; SSE-32-NEXT: movw %dx, {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldcw (%esp)
-; SSE-32-NEXT: xorl %eax, %eax
-; SSE-32-NEXT: ucomiss %xmm1, %xmm0
-; SSE-32-NEXT: setae %al
+; SSE-32-NEXT: movzbl %al, %eax
; SSE-32-NEXT: shll $31, %eax
; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; SSE-32-NEXT: movd %eax, %xmm3
+; SSE-32-NEXT: movd %eax, %xmm1
; SSE-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; SSE-32-NEXT: xorl %eax, %eax
-; SSE-32-NEXT: ucomiss %xmm1, %xmm2
-; SSE-32-NEXT: setae %al
+; SSE-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-32-NEXT: movzbl %cl, %eax
; SSE-32-NEXT: shll $31, %eax
; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; SSE-32-NEXT: movd %eax, %xmm1
@@ -1918,29 +1967,33 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
;
; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
; SSE-64: # %bb.0:
-; SSE-64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE-64-NEXT: xorl %eax, %eax
-; SSE-64-NEXT: ucomiss %xmm2, %xmm0
-; SSE-64-NEXT: setae %al
-; SSE-64-NEXT: shlq $63, %rax
-; SSE-64-NEXT: movaps %xmm0, %xmm1
-; SSE-64-NEXT: cmpltss %xmm2, %xmm1
-; SSE-64-NEXT: andnps %xmm2, %xmm1
-; SSE-64-NEXT: movaps %xmm0, %xmm3
-; SSE-64-NEXT: subss %xmm1, %xmm3
-; SSE-64-NEXT: cvttss2si %xmm3, %rcx
+; SSE-64-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; SSE-64-NEXT: comiss %xmm3, %xmm0
+; SSE-64-NEXT: xorps %xmm2, %xmm2
+; SSE-64-NEXT: xorps %xmm1, %xmm1
+; SSE-64-NEXT: jb .LBB19_2
+; SSE-64-NEXT: # %bb.1:
+; SSE-64-NEXT: movaps %xmm3, %xmm1
+; SSE-64-NEXT: .LBB19_2:
+; SSE-64-NEXT: movaps %xmm0, %xmm4
+; SSE-64-NEXT: subss %xmm1, %xmm4
+; SSE-64-NEXT: cvttss2si %xmm4, %rax
+; SSE-64-NEXT: setae %cl
+; SSE-64-NEXT: movzbl %cl, %ecx
+; SSE-64-NEXT: shlq $63, %rcx
; SSE-64-NEXT: xorq %rax, %rcx
; SSE-64-NEXT: movq %rcx, %xmm1
; SSE-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-64-NEXT: xorl %eax, %eax
-; SSE-64-NEXT: ucomiss %xmm2, %xmm0
-; SSE-64-NEXT: setae %al
-; SSE-64-NEXT: shlq $63, %rax
-; SSE-64-NEXT: movaps %xmm0, %xmm3
-; SSE-64-NEXT: cmpltss %xmm2, %xmm3
-; SSE-64-NEXT: andnps %xmm2, %xmm3
-; SSE-64-NEXT: subss %xmm3, %xmm0
-; SSE-64-NEXT: cvttss2si %xmm0, %rcx
+; SSE-64-NEXT: comiss %xmm3, %xmm0
+; SSE-64-NEXT: jb .LBB19_4
+; SSE-64-NEXT: # %bb.3:
+; SSE-64-NEXT: movaps %xmm3, %xmm2
+; SSE-64-NEXT: .LBB19_4:
+; SSE-64-NEXT: subss %xmm2, %xmm0
+; SSE-64-NEXT: cvttss2si %xmm0, %rax
+; SSE-64-NEXT: setae %cl
+; SSE-64-NEXT: movzbl %cl, %ecx
+; SSE-64-NEXT: shlq $63, %rcx
; SSE-64-NEXT: xorq %rax, %rcx
; SSE-64-NEXT: movq %rcx, %xmm0
; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
@@ -1956,28 +2009,34 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
; AVX-32-NEXT: .cfi_def_cfa_register %ebp
; AVX-32-NEXT: andl $-8, %esp
; AVX-32-NEXT: subl $16, %esp
-; AVX-32-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; AVX-32-NEXT: vcmpltss %xmm2, %xmm1, %xmm3
-; AVX-32-NEXT: vandnps %xmm2, %xmm3, %xmm3
-; AVX-32-NEXT: vsubss %xmm3, %xmm1, %xmm3
+; AVX-32-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; AVX-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-32-NEXT: vcomiss %xmm1, %xmm3
+; AVX-32-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX-32-NEXT: vxorps %xmm4, %xmm4, %xmm4
+; AVX-32-NEXT: jb .LBB19_2
+; AVX-32-NEXT: # %bb.1:
+; AVX-32-NEXT: vmovaps %xmm1, %xmm4
+; AVX-32-NEXT: .LBB19_2:
+; AVX-32-NEXT: vsubss %xmm4, %xmm3, %xmm3
; AVX-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
-; AVX-32-NEXT: vcmpltss %xmm2, %xmm0, %xmm3
-; AVX-32-NEXT: vandnps %xmm2, %xmm3, %xmm3
-; AVX-32-NEXT: vsubss %xmm3, %xmm0, %xmm3
-; AVX-32-NEXT: vmovss %xmm3, (%esp)
; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
-; AVX-32-NEXT: flds (%esp)
-; AVX-32-NEXT: fisttpll (%esp)
-; AVX-32-NEXT: xorl %eax, %eax
-; AVX-32-NEXT: vucomiss %xmm2, %xmm1
; AVX-32-NEXT: setae %al
+; AVX-32-NEXT: movzbl %al, %eax
; AVX-32-NEXT: shll $31, %eax
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; AVX-32-NEXT: xorl %ecx, %ecx
-; AVX-32-NEXT: vucomiss %xmm2, %xmm0
+; AVX-32-NEXT: vcomiss %xmm1, %xmm0
+; AVX-32-NEXT: jb .LBB19_4
+; AVX-32-NEXT: # %bb.3:
+; AVX-32-NEXT: vmovaps %xmm1, %xmm2
+; AVX-32-NEXT: .LBB19_4:
+; AVX-32-NEXT: vsubss %xmm2, %xmm0, %xmm0
+; AVX-32-NEXT: vmovss %xmm0, (%esp)
+; AVX-32-NEXT: flds (%esp)
+; AVX-32-NEXT: fisttpll (%esp)
; AVX-32-NEXT: setae %cl
+; AVX-32-NEXT: movzbl %cl, %ecx
; AVX-32-NEXT: shll $31, %ecx
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
@@ -1992,28 +2051,34 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
; AVX-64-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomiss %xmm1, %xmm0
-; AVX-64-NEXT: setae %al
-; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltss %xmm1, %xmm0, %xmm2
-; AVX-64-NEXT: vandnps %xmm1, %xmm2, %xmm2
-; AVX-64-NEXT: vsubss %xmm2, %xmm0, %xmm2
-; AVX-64-NEXT: vcvttss2si %xmm2, %rcx
+; AVX-64-NEXT: vcomiss %xmm1, %xmm0
+; AVX-64-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX-64-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; AVX-64-NEXT: jb .LBB19_2
+; AVX-64-NEXT: # %bb.1:
+; AVX-64-NEXT: vmovaps %xmm1, %xmm3
+; AVX-64-NEXT: .LBB19_2:
+; AVX-64-NEXT: vsubss %xmm3, %xmm0, %xmm3
+; AVX-64-NEXT: vcvttss2si %xmm3, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
; AVX-64-NEXT: xorq %rax, %rcx
-; AVX-64-NEXT: vmovq %rcx, %xmm2
+; AVX-64-NEXT: vmovq %rcx, %xmm3
; AVX-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomiss %xmm1, %xmm0
-; AVX-64-NEXT: setae %al
-; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltss %xmm1, %xmm0, %xmm3
-; AVX-64-NEXT: vandnps %xmm1, %xmm3, %xmm1
-; AVX-64-NEXT: vsubss %xmm1, %xmm0, %xmm0
-; AVX-64-NEXT: vcvttss2si %xmm0, %rcx
+; AVX-64-NEXT: vcomiss %xmm1, %xmm0
+; AVX-64-NEXT: jb .LBB19_4
+; AVX-64-NEXT: # %bb.3:
+; AVX-64-NEXT: vmovaps %xmm1, %xmm2
+; AVX-64-NEXT: .LBB19_4:
+; AVX-64-NEXT: vsubss %xmm2, %xmm0, %xmm0
+; AVX-64-NEXT: vcvttss2si %xmm0, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
; AVX-64-NEXT: xorq %rax, %rcx
; AVX-64-NEXT: vmovq %rcx, %xmm0
-; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
; AVX-64-NEXT: retq
;
; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
@@ -2085,58 +2150,66 @@ define <4 x i32> @strict_vector_fptoui_v4f32_to_v4i32(<4 x float> %a) #0 {
; SSE-32: # %bb.0:
; SSE-32-NEXT: movaps %xmm0, %xmm1
; SSE-32-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1],xmm0[2,3]
-; SSE-32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE-32-NEXT: xorl %eax, %eax
-; SSE-32-NEXT: ucomiss %xmm2, %xmm1
+; SSE-32-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; SSE-32-NEXT: comiss %xmm3, %xmm1
+; SSE-32-NEXT: xorps %xmm2, %xmm2
+; SSE-32-NEXT: xorps %xmm4, %xmm4
+; SSE-32-NEXT: jb .LBB21_2
+; SSE-32-NEXT: # %bb.1:
+; SSE-32-NEXT: movaps %xmm3, %xmm4
+; SSE-32-NEXT: .LBB21_2:
; SSE-32-NEXT: setae %al
-; SSE-32-NEXT: shll $31, %eax
-; SSE-32-NEXT: movaps %xmm1, %xmm3
-; SSE-32-NEXT: cmpltss %xmm2, %xmm3
-; SSE-32-NEXT: andnps %xmm2, %xmm3
-; SSE-32-NEXT: subss %xmm3, %xmm1
-; SSE-32-NEXT: cvttss2si %xmm1, %ecx
-; SSE-32-NEXT: xorl %eax, %ecx
-; SSE-32-NEXT: movd %ecx, %xmm1
-; SSE-32-NEXT: movaps %xmm0, %xmm3
-; SSE-32-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1]
-; SSE-32-NEXT: xorl %eax, %eax
-; SSE-32-NEXT: ucomiss %xmm2, %xmm3
+; SSE-32-NEXT: movzbl %al, %ecx
+; SSE-32-NEXT: shll $31, %ecx
+; SSE-32-NEXT: subss %xmm4, %xmm1
+; SSE-32-NEXT: cvttss2si %xmm1, %eax
+; SSE-32-NEXT: xorl %ecx, %eax
+; SSE-32-NEXT: movaps %xmm0, %xmm4
+; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1]
+; SSE-32-NEXT: comiss %xmm3, %xmm4
+; SSE-32-NEXT: xorps %xmm5, %xmm5
+; SSE-32-NEXT: jb .LBB21_4
+; SSE-32-NEXT: # %bb.3:
+; SSE-32-NEXT: movaps %xmm3, %xmm5
+; SSE-32-NEXT: .LBB21_4:
+; SSE-32-NEXT: movd %eax, %xmm1
; SSE-32-NEXT: setae %al
+; SSE-32-NEXT: movzbl %al, %eax
; SSE-32-NEXT: shll $31, %eax
-; SSE-32-NEXT: movaps %xmm3, %xmm4
-; SSE-32-NEXT: cmpltss %xmm2, %xmm4
-; SSE-32-NEXT: andnps %xmm2, %xmm4
-; SSE-32-NEXT: subss %xmm4, %xmm3
-; SSE-32-NEXT: cvttss2si %xmm3, %ecx
+; SSE-32-NEXT: subss %xmm5, %xmm4
+; SSE-32-NEXT: cvttss2si %xmm4, %ecx
; SSE-32-NEXT: xorl %eax, %ecx
-; SSE-32-NEXT: movd %ecx, %xmm3
-; SSE-32-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
-; SSE-32-NEXT: xorl %eax, %eax
-; SSE-32-NEXT: ucomiss %xmm2, %xmm0
+; SSE-32-NEXT: movd %ecx, %xmm4
+; SSE-32-NEXT: comiss %xmm3, %xmm0
+; SSE-32-NEXT: xorps %xmm5, %xmm5
+; SSE-32-NEXT: jb .LBB21_6
+; SSE-32-NEXT: # %bb.5:
+; SSE-32-NEXT: movaps %xmm3, %xmm5
+; SSE-32-NEXT: .LBB21_6:
+; SSE-32-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
; SSE-32-NEXT: setae %al
+; SSE-32-NEXT: movzbl %al, %eax
; SSE-32-NEXT: shll $31, %eax
; SSE-32-NEXT: movaps %xmm0, %xmm1
-; SSE-32-NEXT: cmpltss %xmm2, %xmm1
-; SSE-32-NEXT: andnps %xmm2, %xmm1
-; SSE-32-NEXT: movaps %xmm0, %xmm4
-; SSE-32-NEXT: subss %xmm1, %xmm4
-; SSE-32-NEXT: cvttss2si %xmm4, %ecx
+; SSE-32-NEXT: subss %xmm5, %xmm1
+; SSE-32-NEXT: cvttss2si %xmm1, %ecx
; SSE-32-NEXT: xorl %eax, %ecx
; SSE-32-NEXT: movd %ecx, %xmm1
; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-32-NEXT: xorl %eax, %eax
-; SSE-32-NEXT: ucomiss %xmm2, %xmm0
+; SSE-32-NEXT: comiss %xmm3, %xmm0
+; SSE-32-NEXT: jb .LBB21_8
+; SSE-32-NEXT: # %bb.7:
+; SSE-32-NEXT: movaps %xmm3, %xmm2
+; SSE-32-NEXT: .LBB21_8:
; SSE-32-NEXT: setae %al
+; SSE-32-NEXT: movzbl %al, %eax
; SSE-32-NEXT: shll $31, %eax
-; SSE-32-NEXT: movaps %xmm0, %xmm4
-; SSE-32-NEXT: cmpltss %xmm2, %xmm4
-; SSE-32-NEXT: andnps %xmm2, %xmm4
-; SSE-32-NEXT: subss %xmm4, %xmm0
+; SSE-32-NEXT: subss %xmm2, %xmm0
; SSE-32-NEXT: cvttss2si %xmm0, %ecx
; SSE-32-NEXT: xorl %eax, %ecx
; SSE-32-NEXT: movd %ecx, %xmm0
; SSE-32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm4[0]
; SSE-32-NEXT: movdqa %xmm1, %xmm0
; SSE-32-NEXT: retl
;
diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll
index bcb002823d9d..053d708c4afc 100644
--- a/llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll
@@ -163,7 +163,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX-32-NEXT: subl $32, %esp
; AVX-32-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
; AVX-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-32-NEXT: vucomisd %xmm1, %xmm3
+; AVX-32-NEXT: vcomisd %xmm1, %xmm3
; AVX-32-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; AVX-32-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; AVX-32-NEXT: jb .LBB1_2
@@ -180,7 +180,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX-32-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX-32-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0]
-; AVX-32-NEXT: vucomisd %xmm1, %xmm4
+; AVX-32-NEXT: vcomisd %xmm1, %xmm4
; AVX-32-NEXT: vxorpd %xmm5, %xmm5, %xmm5
; AVX-32-NEXT: jb .LBB1_4
; AVX-32-NEXT: # %bb.3:
@@ -194,7 +194,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX-32-NEXT: movzbl %cl, %ecx
; AVX-32-NEXT: shll $31, %ecx
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
-; AVX-32-NEXT: vucomisd %xmm1, %xmm3
+; AVX-32-NEXT: vcomisd %xmm1, %xmm3
; AVX-32-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; AVX-32-NEXT: jb .LBB1_6
; AVX-32-NEXT: # %bb.5:
@@ -208,7 +208,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX-32-NEXT: movzbl %dl, %edx
; AVX-32-NEXT: shll $31, %edx
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
-; AVX-32-NEXT: vucomisd %xmm1, %xmm0
+; AVX-32-NEXT: vcomisd %xmm1, %xmm0
; AVX-32-NEXT: jb .LBB1_8
; AVX-32-NEXT: # %bb.7:
; AVX-32-NEXT: vmovapd %xmm1, %xmm2
@@ -237,53 +237,65 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
;
; AVX-64-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
; AVX-64: # %bb.0:
-; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX-64-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomisd %xmm1, %xmm2
+; AVX-64-NEXT: vcomisd %xmm1, %xmm3
+; AVX-64-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; AVX-64-NEXT: vxorpd %xmm4, %xmm4, %xmm4
+; AVX-64-NEXT: jb .LBB1_2
+; AVX-64-NEXT: # %bb.1:
+; AVX-64-NEXT: vmovapd %xmm1, %xmm4
+; AVX-64-NEXT: .LBB1_2:
+; AVX-64-NEXT: vsubsd %xmm4, %xmm3, %xmm4
+; AVX-64-NEXT: vcvttsd2si %xmm4, %rcx
; AVX-64-NEXT: setae %al
+; AVX-64-NEXT: movzbl %al, %eax
; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltsd %xmm1, %xmm2, %xmm3
-; AVX-64-NEXT: vandnpd %xmm1, %xmm3, %xmm3
-; AVX-64-NEXT: vsubsd %xmm3, %xmm2, %xmm3
-; AVX-64-NEXT: vcvttsd2si %xmm3, %rcx
+; AVX-64-NEXT: xorq %rcx, %rax
+; AVX-64-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0]
+; AVX-64-NEXT: vcomisd %xmm1, %xmm4
+; AVX-64-NEXT: vxorpd %xmm5, %xmm5, %xmm5
+; AVX-64-NEXT: jb .LBB1_4
+; AVX-64-NEXT: # %bb.3:
+; AVX-64-NEXT: vmovapd %xmm1, %xmm5
+; AVX-64-NEXT: .LBB1_4:
+; AVX-64-NEXT: vmovq %rax, %xmm3
+; AVX-64-NEXT: vsubsd %xmm5, %xmm4, %xmm4
+; AVX-64-NEXT: vcvttsd2si %xmm4, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
; AVX-64-NEXT: xorq %rax, %rcx
-; AVX-64-NEXT: vmovq %rcx, %xmm3
-; AVX-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomisd %xmm1, %xmm2
-; AVX-64-NEXT: setae %al
-; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltsd %xmm1, %xmm2, %xmm4
-; AVX-64-NEXT: vandnpd %xmm1, %xmm4, %xmm4
-; AVX-64-NEXT: vsubsd %xmm4, %xmm2, %xmm2
-; AVX-64-NEXT: vcvttsd2si %xmm2, %rcx
-; AVX-64-NEXT: xorq %rax, %rcx
-; AVX-64-NEXT: vmovq %rcx, %xmm2
-; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomisd %xmm1, %xmm0
-; AVX-64-NEXT: setae %al
-; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltsd %xmm1, %xmm0, %xmm3
-; AVX-64-NEXT: vandnpd %xmm1, %xmm3, %xmm3
-; AVX-64-NEXT: vsubsd %xmm3, %xmm0, %xmm3
-; AVX-64-NEXT: vcvttsd2si %xmm3, %rcx
+; AVX-64-NEXT: vmovq %rcx, %xmm4
+; AVX-64-NEXT: vcomisd %xmm1, %xmm0
+; AVX-64-NEXT: vxorpd %xmm5, %xmm5, %xmm5
+; AVX-64-NEXT: jb .LBB1_6
+; AVX-64-NEXT: # %bb.5:
+; AVX-64-NEXT: vmovapd %xmm1, %xmm5
+; AVX-64-NEXT: .LBB1_6:
+; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX-64-NEXT: vsubsd %xmm5, %xmm0, %xmm4
+; AVX-64-NEXT: vcvttsd2si %xmm4, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
; AVX-64-NEXT: xorq %rax, %rcx
-; AVX-64-NEXT: vmovq %rcx, %xmm3
+; AVX-64-NEXT: vmovq %rcx, %xmm4
; AVX-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomisd %xmm1, %xmm0
-; AVX-64-NEXT: setae %al
-; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltsd %xmm1, %xmm0, %xmm4
-; AVX-64-NEXT: vandnpd %xmm1, %xmm4, %xmm1
-; AVX-64-NEXT: vsubsd %xmm1, %xmm0, %xmm0
-; AVX-64-NEXT: vcvttsd2si %xmm0, %rcx
+; AVX-64-NEXT: vcomisd %xmm1, %xmm0
+; AVX-64-NEXT: jb .LBB1_8
+; AVX-64-NEXT: # %bb.7:
+; AVX-64-NEXT: vmovapd %xmm1, %xmm2
+; AVX-64-NEXT: .LBB1_8:
+; AVX-64-NEXT: vsubsd %xmm2, %xmm0, %xmm0
+; AVX-64-NEXT: vcvttsd2si %xmm0, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
; AVX-64-NEXT: xorq %rax, %rcx
; AVX-64-NEXT: vmovq %rcx, %xmm0
-; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
-; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm4[0],xmm0[0]
+; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX-64-NEXT: retq
;
; AVX512VL-32-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
@@ -301,7 +313,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512VL-32-NEXT: .cfi_offset %ebx, -12
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512VL-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm2
+; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm2
; AVX512VL-32-NEXT: setb %cl
; AVX512VL-32-NEXT: kmovw %ecx, %k1
; AVX512VL-32-NEXT: vxorpd %xmm3, %xmm3, %xmm3
@@ -319,7 +331,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512VL-32-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm4 = xmm2[1,0]
; AVX512VL-32-NEXT: xorl %ecx, %ecx
-; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm4
+; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm4
; AVX512VL-32-NEXT: setb %dl
; AVX512VL-32-NEXT: kmovw %edx, %k1
; AVX512VL-32-NEXT: vmovapd %xmm1, %xmm5
@@ -332,7 +344,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512VL-32-NEXT: shll $31, %ecx
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; AVX512VL-32-NEXT: xorl %edx, %edx
-; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm2
+; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm2
; AVX512VL-32-NEXT: setb %bl
; AVX512VL-32-NEXT: kmovw %ebx, %k1
; AVX512VL-32-NEXT: vmovapd %xmm1, %xmm4
@@ -345,7 +357,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512VL-32-NEXT: shll $31, %edx
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
; AVX512VL-32-NEXT: xorl %ebx, %ebx
-; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm0
+; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm0
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
; AVX512VL-32-NEXT: vmovsd %xmm3, %xmm1, %xmm1 {%k1}
@@ -532,7 +544,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX-32-NEXT: subl $32, %esp
; AVX-32-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
; AVX-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX-32-NEXT: vucomiss %xmm1, %xmm3
+; AVX-32-NEXT: vcomiss %xmm1, %xmm3
; AVX-32-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX-32-NEXT: vxorps %xmm4, %xmm4, %xmm4
; AVX-32-NEXT: jb .LBB3_2
@@ -548,7 +560,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX-32-NEXT: shll $31, %eax
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX-32-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,1,2,3]
-; AVX-32-NEXT: vucomiss %xmm1, %xmm3
+; AVX-32-NEXT: vcomiss %xmm1, %xmm3
; AVX-32-NEXT: vxorps %xmm4, %xmm4, %xmm4
; AVX-32-NEXT: jb .LBB3_4
; AVX-32-NEXT: # %bb.3:
@@ -563,7 +575,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX-32-NEXT: shll $31, %ecx
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; AVX-32-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
-; AVX-32-NEXT: vucomiss %xmm1, %xmm3
+; AVX-32-NEXT: vcomiss %xmm1, %xmm3
; AVX-32-NEXT: vxorps %xmm4, %xmm4, %xmm4
; AVX-32-NEXT: jb .LBB3_6
; AVX-32-NEXT: # %bb.5:
@@ -577,7 +589,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX-32-NEXT: movzbl %dl, %edx
; AVX-32-NEXT: shll $31, %edx
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
-; AVX-32-NEXT: vucomiss %xmm1, %xmm0
+; AVX-32-NEXT: vcomiss %xmm1, %xmm0
; AVX-32-NEXT: jb .LBB3_8
; AVX-32-NEXT: # %bb.7:
; AVX-32-NEXT: vmovaps %xmm1, %xmm2
@@ -606,53 +618,65 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
;
; AVX-64-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
; AVX-64: # %bb.0:
-; AVX-64-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
+; AVX-64-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,1,2,3]
; AVX-64-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomiss %xmm1, %xmm2
-; AVX-64-NEXT: setae %al
-; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltss %xmm1, %xmm2, %xmm3
-; AVX-64-NEXT: vandnps %xmm1, %xmm3, %xmm3
-; AVX-64-NEXT: vsubss %xmm3, %xmm2, %xmm2
-; AVX-64-NEXT: vcvttss2si %xmm2, %rcx
-; AVX-64-NEXT: xorq %rax, %rcx
-; AVX-64-NEXT: vmovq %rcx, %xmm2
-; AVX-64-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomiss %xmm1, %xmm3
-; AVX-64-NEXT: setae %al
-; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltss %xmm1, %xmm3, %xmm4
-; AVX-64-NEXT: vandnps %xmm1, %xmm4, %xmm4
+; AVX-64-NEXT: vcomiss %xmm1, %xmm3
+; AVX-64-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX-64-NEXT: vxorps %xmm4, %xmm4, %xmm4
+; AVX-64-NEXT: jb .LBB3_2
+; AVX-64-NEXT: # %bb.1:
+; AVX-64-NEXT: vmovaps %xmm1, %xmm4
+; AVX-64-NEXT: .LBB3_2:
; AVX-64-NEXT: vsubss %xmm4, %xmm3, %xmm3
; AVX-64-NEXT: vcvttss2si %xmm3, %rcx
-; AVX-64-NEXT: xorq %rax, %rcx
-; AVX-64-NEXT: vmovq %rcx, %xmm3
-; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomiss %xmm1, %xmm0
; AVX-64-NEXT: setae %al
+; AVX-64-NEXT: movzbl %al, %eax
; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltss %xmm1, %xmm0, %xmm3
-; AVX-64-NEXT: vandnps %xmm1, %xmm3, %xmm3
-; AVX-64-NEXT: vsubss %xmm3, %xmm0, %xmm3
-; AVX-64-NEXT: vcvttss2si %xmm3, %rcx
+; AVX-64-NEXT: xorq %rcx, %rax
+; AVX-64-NEXT: vpermilpd {{.*#+}} xmm4 = xmm0[1,0]
+; AVX-64-NEXT: vcomiss %xmm1, %xmm4
+; AVX-64-NEXT: vxorps %xmm5, %xmm5, %xmm5
+; AVX-64-NEXT: jb .LBB3_4
+; AVX-64-NEXT: # %bb.3:
+; AVX-64-NEXT: vmovaps %xmm1, %xmm5
+; AVX-64-NEXT: .LBB3_4:
+; AVX-64-NEXT: vmovq %rax, %xmm3
+; AVX-64-NEXT: vsubss %xmm5, %xmm4, %xmm4
+; AVX-64-NEXT: vcvttss2si %xmm4, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
; AVX-64-NEXT: xorq %rax, %rcx
-; AVX-64-NEXT: vmovq %rcx, %xmm3
+; AVX-64-NEXT: vmovq %rcx, %xmm4
+; AVX-64-NEXT: vcomiss %xmm1, %xmm0
+; AVX-64-NEXT: vxorps %xmm5, %xmm5, %xmm5
+; AVX-64-NEXT: jb .LBB3_6
+; AVX-64-NEXT: # %bb.5:
+; AVX-64-NEXT: vmovaps %xmm1, %xmm5
+; AVX-64-NEXT: .LBB3_6:
+; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0]
+; AVX-64-NEXT: vsubss %xmm5, %xmm0, %xmm4
+; AVX-64-NEXT: vcvttss2si %xmm4, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
+; AVX-64-NEXT: xorq %rax, %rcx
+; AVX-64-NEXT: vmovq %rcx, %xmm4
; AVX-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomiss %xmm1, %xmm0
-; AVX-64-NEXT: setae %al
-; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltss %xmm1, %xmm0, %xmm4
-; AVX-64-NEXT: vandnps %xmm1, %xmm4, %xmm1
-; AVX-64-NEXT: vsubss %xmm1, %xmm0, %xmm0
-; AVX-64-NEXT: vcvttss2si %xmm0, %rcx
+; AVX-64-NEXT: vcomiss %xmm1, %xmm0
+; AVX-64-NEXT: jb .LBB3_8
+; AVX-64-NEXT: # %bb.7:
+; AVX-64-NEXT: vmovaps %xmm1, %xmm2
+; AVX-64-NEXT: .LBB3_8:
+; AVX-64-NEXT: vsubss %xmm2, %xmm0, %xmm0
+; AVX-64-NEXT: vcvttss2si %xmm0, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
; AVX-64-NEXT: xorq %rax, %rcx
; AVX-64-NEXT: vmovq %rcx, %xmm0
-; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
-; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm4[0],xmm0[0]
+; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX-64-NEXT: retq
;
; AVX512VL-32-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
@@ -670,7 +694,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512VL-32-NEXT: .cfi_offset %ebx, -12
; AVX512VL-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm2
+; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm2
; AVX512VL-32-NEXT: setb %cl
; AVX512VL-32-NEXT: kmovw %ecx, %k1
; AVX512VL-32-NEXT: vxorps %xmm3, %xmm3, %xmm3
@@ -687,7 +711,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512VL-32-NEXT: movl %eax, %esi
; AVX512VL-32-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
; AVX512VL-32-NEXT: xorl %ecx, %ecx
-; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm2
+; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm2
; AVX512VL-32-NEXT: setb %dl
; AVX512VL-32-NEXT: kmovw %edx, %k1
; AVX512VL-32-NEXT: vmovaps %xmm1, %xmm4
@@ -701,7 +725,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512VL-32-NEXT: xorl %edx, %edx
-; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm2
+; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm2
; AVX512VL-32-NEXT: setb %bl
; AVX512VL-32-NEXT: kmovw %ebx, %k1
; AVX512VL-32-NEXT: vmovaps %xmm1, %xmm4
@@ -714,7 +738,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512VL-32-NEXT: shll $31, %edx
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
; AVX512VL-32-NEXT: xorl %ebx, %ebx
-; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm0
+; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm0
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
; AVX512VL-32-NEXT: vmovss %xmm3, %xmm1, %xmm1 {%k1}
diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll
index 26806db74a1e..c5bf545ce776 100644
--- a/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll
@@ -150,7 +150,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: .cfi_offset %ebx, -12
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
; AVX512VL-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm3
+; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm3
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
; AVX512VL-32-NEXT: vxorpd %xmm2, %xmm2, %xmm2
@@ -167,7 +167,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; AVX512VL-32-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0]
-; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm4
+; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm4
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
; AVX512VL-32-NEXT: vmovapd %xmm1, %xmm5
@@ -181,7 +181,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX512VL-32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm3
+; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm3
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
; AVX512VL-32-NEXT: vmovapd %xmm1, %xmm4
@@ -197,7 +197,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: movl %eax, %edi
; AVX512VL-32-NEXT: vextractf32x4 $2, %zmm0, %xmm3
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0]
-; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm4
+; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm4
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
; AVX512VL-32-NEXT: vmovapd %xmm1, %xmm5
@@ -212,7 +212,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX512VL-32-NEXT: movl %eax, %esi
; AVX512VL-32-NEXT: xorl %edx, %edx
-; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm3
+; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm3
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
; AVX512VL-32-NEXT: vmovapd %xmm1, %xmm4
@@ -227,7 +227,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: vextractf32x4 $3, %zmm0, %xmm3
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0]
; AVX512VL-32-NEXT: xorl %ecx, %ecx
-; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm4
+; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm4
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
; AVX512VL-32-NEXT: vmovapd %xmm1, %xmm5
@@ -240,7 +240,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: shll $31, %ecx
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; AVX512VL-32-NEXT: xorl %eax, %eax
-; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm3
+; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm3
; AVX512VL-32-NEXT: setb %bl
; AVX512VL-32-NEXT: kmovw %ebx, %k1
; AVX512VL-32-NEXT: vmovapd %xmm1, %xmm4
@@ -252,7 +252,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm0
+; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm0
; AVX512VL-32-NEXT: setb %bl
; AVX512VL-32-NEXT: kmovw %ebx, %k1
; AVX512VL-32-NEXT: vmovsd %xmm2, %xmm1, %xmm1 {%k1}
@@ -454,7 +454,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: .cfi_offset %ebx, -12
; AVX512VL-32-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm3
+; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm3
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
; AVX512VL-32-NEXT: vxorps %xmm2, %xmm2, %xmm2
@@ -470,7 +470,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX512VL-32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; AVX512VL-32-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,1,2,3]
-; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm3
+; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm3
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
; AVX512VL-32-NEXT: vmovaps %xmm1, %xmm4
@@ -485,7 +485,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX512VL-32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
-; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm3
+; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm3
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
; AVX512VL-32-NEXT: vmovaps %xmm1, %xmm4
@@ -501,7 +501,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: movl %eax, %edi
; AVX512VL-32-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX512VL-32-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3]
-; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm4
+; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm4
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
; AVX512VL-32-NEXT: vmovaps %xmm1, %xmm5
@@ -516,7 +516,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX512VL-32-NEXT: movl %eax, %esi
; AVX512VL-32-NEXT: xorl %edx, %edx
-; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm3
+; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm3
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
; AVX512VL-32-NEXT: vmovaps %xmm1, %xmm4
@@ -530,7 +530,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
; AVX512VL-32-NEXT: vpermilps {{.*#+}} xmm4 = xmm3[3,1,2,3]
; AVX512VL-32-NEXT: xorl %ecx, %ecx
-; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm4
+; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm4
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
; AVX512VL-32-NEXT: vmovaps %xmm1, %xmm5
@@ -544,7 +544,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
; AVX512VL-32-NEXT: xorl %eax, %eax
-; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm3
+; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm3
; AVX512VL-32-NEXT: setb %bl
; AVX512VL-32-NEXT: kmovw %ebx, %k1
; AVX512VL-32-NEXT: vmovaps %xmm1, %xmm4
@@ -556,7 +556,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm0
+; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm0
; AVX512VL-32-NEXT: setb %bl
; AVX512VL-32-NEXT: kmovw %ebx, %k1
; AVX512VL-32-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1}
diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
index 40ff465a9ddf..cca16cc0d704 100644
--- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
@@ -4465,18 +4465,38 @@ entry:
define <1 x i64> @constrained_vector_fptoui_v1i64_v1f32() #0 {
; CHECK-LABEL: constrained_vector_fptoui_v1i64_v1f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: subss %xmm0, %xmm1
-; CHECK-NEXT: cvttss2si %xmm1, %rax
+; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK-NEXT: comiss %xmm0, %xmm2
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: ja .LBB115_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: movaps %xmm2, %xmm1
+; CHECK-NEXT: .LBB115_2: # %entry
+; CHECK-NEXT: subss %xmm1, %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %rcx
+; CHECK-NEXT: setbe %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: shlq $63, %rax
+; CHECK-NEXT: xorq %rcx, %rax
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_fptoui_v1i64_v1f32:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX1-NEXT: vsubss %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vcvttss2si %xmm0, %rax
+; AVX1-NEXT: vcomiss %xmm0, %xmm1
+; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: ja .LBB115_2
+; AVX1-NEXT: # %bb.1: # %entry
+; AVX1-NEXT: vmovaps %xmm1, %xmm2
+; AVX1-NEXT: .LBB115_2: # %entry
+; AVX1-NEXT: vsubss %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vcvttss2si %xmm0, %rcx
+; AVX1-NEXT: setbe %al
+; AVX1-NEXT: movzbl %al, %eax
+; AVX1-NEXT: shlq $63, %rax
+; AVX1-NEXT: xorq %rcx, %rax
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_fptoui_v1i64_v1f32:
@@ -4493,30 +4513,70 @@ entry:
define <2 x i64> @constrained_vector_fptoui_v2i64_v2f32() #0 {
; CHECK-LABEL: constrained_vector_fptoui_v2i64_v2f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: subss %xmm0, %xmm1
-; CHECK-NEXT: cvttss2si %xmm1, %rax
-; CHECK-NEXT: movq %rax, %xmm1
; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT: subss %xmm0, %xmm2
+; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: comiss %xmm2, %xmm1
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: xorps %xmm3, %xmm3
+; CHECK-NEXT: ja .LBB116_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: movaps %xmm1, %xmm3
+; CHECK-NEXT: .LBB116_2: # %entry
+; CHECK-NEXT: subss %xmm3, %xmm2
; CHECK-NEXT: cvttss2si %xmm2, %rax
-; CHECK-NEXT: movq %rax, %xmm0
-; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: setbe %cl
+; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: shlq $63, %rcx
+; CHECK-NEXT: xorq %rax, %rcx
+; CHECK-NEXT: movq %rcx, %xmm2
+; CHECK-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; CHECK-NEXT: comiss %xmm3, %xmm1
+; CHECK-NEXT: ja .LBB116_4
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: .LBB116_4: # %entry
+; CHECK-NEXT: subss %xmm0, %xmm3
+; CHECK-NEXT: cvttss2si %xmm3, %rax
+; CHECK-NEXT: setbe %cl
+; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: shlq $63, %rcx
+; CHECK-NEXT: xorq %rax, %rcx
+; CHECK-NEXT: movq %rcx, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_fptoui_v2i64_v2f32:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX1-NEXT: vsubss %xmm0, %xmm1, %xmm1
-; AVX1-NEXT: vcvttss2si %xmm1, %rax
-; AVX1-NEXT: vmovq %rax, %xmm1
; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; AVX1-NEXT: vsubss %xmm0, %xmm2, %xmm0
+; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT: vcomiss %xmm2, %xmm0
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: ja .LBB116_2
+; AVX1-NEXT: # %bb.1: # %entry
+; AVX1-NEXT: vmovaps %xmm0, %xmm3
+; AVX1-NEXT: .LBB116_2: # %entry
+; AVX1-NEXT: vsubss %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcvttss2si %xmm2, %rax
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm2
+; AVX1-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; AVX1-NEXT: vcomiss %xmm3, %xmm0
+; AVX1-NEXT: ja .LBB116_4
+; AVX1-NEXT: # %bb.3: # %entry
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: .LBB116_4: # %entry
+; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm0
; AVX1-NEXT: vcvttss2si %xmm0, %rax
-; AVX1-NEXT: vmovq %rax, %xmm0
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm0
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_fptoui_v2i64_v2f32:
@@ -4537,35 +4597,95 @@ entry:
define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 {
; CHECK-LABEL: constrained_vector_fptoui_v3i64_v3f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: subss %xmm0, %xmm1
-; CHECK-NEXT: cvttss2si %xmm1, %rax
-; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: subss %xmm0, %xmm1
-; CHECK-NEXT: cvttss2si %xmm1, %rdx
+; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: subss %xmm0, %xmm1
-; CHECK-NEXT: cvttss2si %xmm1, %rcx
+; CHECK-NEXT: comiss %xmm2, %xmm1
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: xorps %xmm3, %xmm3
+; CHECK-NEXT: ja .LBB117_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: movaps %xmm1, %xmm3
+; CHECK-NEXT: .LBB117_2: # %entry
+; CHECK-NEXT: subss %xmm3, %xmm2
+; CHECK-NEXT: cvttss2si %xmm2, %rcx
+; CHECK-NEXT: setbe %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: shlq $63, %rax
+; CHECK-NEXT: xorq %rcx, %rax
+; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK-NEXT: comiss %xmm2, %xmm1
+; CHECK-NEXT: xorps %xmm3, %xmm3
+; CHECK-NEXT: ja .LBB117_4
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: movaps %xmm1, %xmm3
+; CHECK-NEXT: .LBB117_4: # %entry
+; CHECK-NEXT: subss %xmm3, %xmm2
+; CHECK-NEXT: cvttss2si %xmm2, %rcx
+; CHECK-NEXT: setbe %dl
+; CHECK-NEXT: movzbl %dl, %edx
+; CHECK-NEXT: shlq $63, %rdx
+; CHECK-NEXT: xorq %rcx, %rdx
+; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK-NEXT: comiss %xmm2, %xmm1
+; CHECK-NEXT: ja .LBB117_6
+; CHECK-NEXT: # %bb.5: # %entry
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: .LBB117_6: # %entry
+; CHECK-NEXT: subss %xmm0, %xmm2
+; CHECK-NEXT: cvttss2si %xmm2, %rsi
+; CHECK-NEXT: setbe %cl
+; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: shlq $63, %rcx
+; CHECK-NEXT: xorq %rsi, %rcx
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_fptoui_v3i64_v3f32:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX1-NEXT: vsubss %xmm0, %xmm1, %xmm1
-; AVX1-NEXT: vcvttss2si %xmm1, %rax
-; AVX1-NEXT: vmovq %rax, %xmm1
; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; AVX1-NEXT: vsubss %xmm0, %xmm2, %xmm2
+; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT: vcomiss %xmm2, %xmm0
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: ja .LBB117_2
+; AVX1-NEXT: # %bb.1: # %entry
+; AVX1-NEXT: vmovaps %xmm0, %xmm3
+; AVX1-NEXT: .LBB117_2: # %entry
+; AVX1-NEXT: vsubss %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vcvttss2si %xmm2, %rax
-; AVX1-NEXT: vmovq %rax, %xmm2
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
-; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; AVX1-NEXT: vsubss %xmm0, %xmm2, %xmm0
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm2
+; AVX1-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; AVX1-NEXT: vcomiss %xmm3, %xmm0
+; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: ja .LBB117_4
+; AVX1-NEXT: # %bb.3: # %entry
+; AVX1-NEXT: vmovaps %xmm0, %xmm4
+; AVX1-NEXT: .LBB117_4: # %entry
+; AVX1-NEXT: vsubss %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vcvttss2si %xmm3, %rax
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm3
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; AVX1-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; AVX1-NEXT: vcomiss %xmm3, %xmm0
+; AVX1-NEXT: ja .LBB117_6
+; AVX1-NEXT: # %bb.5: # %entry
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: .LBB117_6: # %entry
+; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm0
; AVX1-NEXT: vcvttss2si %xmm0, %rax
-; AVX1-NEXT: vmovq %rax, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_fptoui_v3i64_v3f32:
@@ -4590,49 +4710,129 @@ entry:
define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 {
; CHECK-LABEL: constrained_vector_fptoui_v4i64_v4f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: subss %xmm1, %xmm0
-; CHECK-NEXT: cvttss2si %xmm0, %rax
-; CHECK-NEXT: movq %rax, %xmm2
+; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK-NEXT: comiss %xmm0, %xmm2
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: xorps %xmm3, %xmm3
+; CHECK-NEXT: ja .LBB118_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: movaps %xmm2, %xmm3
+; CHECK-NEXT: .LBB118_2: # %entry
+; CHECK-NEXT: subss %xmm3, %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %rcx
+; CHECK-NEXT: setbe %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: shlq $63, %rax
+; CHECK-NEXT: xorq %rcx, %rax
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: subss %xmm1, %xmm0
+; CHECK-NEXT: comiss %xmm0, %xmm2
+; CHECK-NEXT: xorps %xmm4, %xmm4
+; CHECK-NEXT: ja .LBB118_4
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: movaps %xmm2, %xmm4
+; CHECK-NEXT: .LBB118_4: # %entry
+; CHECK-NEXT: movq %rax, %xmm3
+; CHECK-NEXT: subss %xmm4, %xmm0
; CHECK-NEXT: cvttss2si %xmm0, %rax
-; CHECK-NEXT: movq %rax, %xmm0
-; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT: subss %xmm1, %xmm2
-; CHECK-NEXT: cvttss2si %xmm2, %rax
-; CHECK-NEXT: movq %rax, %xmm2
-; CHECK-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; CHECK-NEXT: subss %xmm1, %xmm3
-; CHECK-NEXT: cvttss2si %xmm3, %rax
-; CHECK-NEXT: movq %rax, %xmm1
-; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; CHECK-NEXT: setbe %cl
+; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: shlq $63, %rcx
+; CHECK-NEXT: xorq %rax, %rcx
+; CHECK-NEXT: movq %rcx, %xmm0
+; CHECK-NEXT: movss {{.*#+}} xmm4 = mem[0],zero,zero,zero
+; CHECK-NEXT: comiss %xmm4, %xmm2
+; CHECK-NEXT: xorps %xmm5, %xmm5
+; CHECK-NEXT: ja .LBB118_6
+; CHECK-NEXT: # %bb.5: # %entry
+; CHECK-NEXT: movaps %xmm2, %xmm5
+; CHECK-NEXT: .LBB118_6: # %entry
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; CHECK-NEXT: subss %xmm5, %xmm4
+; CHECK-NEXT: cvttss2si %xmm4, %rax
+; CHECK-NEXT: setbe %cl
+; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: shlq $63, %rcx
+; CHECK-NEXT: xorq %rax, %rcx
+; CHECK-NEXT: movq %rcx, %xmm3
+; CHECK-NEXT: movss {{.*#+}} xmm4 = mem[0],zero,zero,zero
+; CHECK-NEXT: comiss %xmm4, %xmm2
+; CHECK-NEXT: ja .LBB118_8
+; CHECK-NEXT: # %bb.7: # %entry
+; CHECK-NEXT: movaps %xmm2, %xmm1
+; CHECK-NEXT: .LBB118_8: # %entry
+; CHECK-NEXT: subss %xmm1, %xmm4
+; CHECK-NEXT: cvttss2si %xmm4, %rax
+; CHECK-NEXT: setbe %cl
+; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: shlq $63, %rcx
+; CHECK-NEXT: xorq %rax, %rcx
+; CHECK-NEXT: movq %rcx, %xmm1
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_fptoui_v4i64_v4f32:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX1-NEXT: vsubss %xmm0, %xmm1, %xmm1
-; AVX1-NEXT: vcvttss2si %xmm1, %rax
-; AVX1-NEXT: vmovq %rax, %xmm1
-; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; AVX1-NEXT: vsubss %xmm0, %xmm2, %xmm2
-; AVX1-NEXT: vcvttss2si %xmm2, %rax
-; AVX1-NEXT: vmovq %rax, %xmm2
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; AVX1-NEXT: vsubss %xmm0, %xmm2, %xmm2
-; AVX1-NEXT: vcvttss2si %xmm2, %rax
-; AVX1-NEXT: vmovq %rax, %xmm2
+; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT: vcomiss %xmm2, %xmm0
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: ja .LBB118_2
+; AVX1-NEXT: # %bb.1: # %entry
+; AVX1-NEXT: vmovaps %xmm0, %xmm3
+; AVX1-NEXT: .LBB118_2: # %entry
+; AVX1-NEXT: vsubss %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcvttss2si %xmm2, %rcx
+; AVX1-NEXT: setbe %al
+; AVX1-NEXT: movzbl %al, %eax
+; AVX1-NEXT: shlq $63, %rax
+; AVX1-NEXT: xorq %rcx, %rax
; AVX1-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; AVX1-NEXT: vsubss %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vcomiss %xmm3, %xmm0
+; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: ja .LBB118_4
+; AVX1-NEXT: # %bb.3: # %entry
+; AVX1-NEXT: vmovaps %xmm0, %xmm4
+; AVX1-NEXT: .LBB118_4: # %entry
+; AVX1-NEXT: vmovq %rax, %xmm2
+; AVX1-NEXT: vsubss %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vcvttss2si %xmm3, %rax
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm3
+; AVX1-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
+; AVX1-NEXT: vcomiss %xmm4, %xmm0
+; AVX1-NEXT: vxorps %xmm5, %xmm5, %xmm5
+; AVX1-NEXT: ja .LBB118_6
+; AVX1-NEXT: # %bb.5: # %entry
+; AVX1-NEXT: vmovaps %xmm0, %xmm5
+; AVX1-NEXT: .LBB118_6: # %entry
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; AVX1-NEXT: vsubss %xmm5, %xmm4, %xmm3
+; AVX1-NEXT: vcvttss2si %xmm3, %rax
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm3
+; AVX1-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
+; AVX1-NEXT: vcomiss %xmm4, %xmm0
+; AVX1-NEXT: ja .LBB118_8
+; AVX1-NEXT: # %bb.7: # %entry
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: .LBB118_8: # %entry
+; AVX1-NEXT: vsubss %xmm1, %xmm4, %xmm0
; AVX1-NEXT: vcvttss2si %xmm0, %rax
-; AVX1-NEXT: vmovq %rax, %xmm0
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm0
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX512F-LABEL: constrained_vector_fptoui_v4i64_v4f32:
@@ -4810,18 +5010,38 @@ entry:
define <1 x i64> @constrained_vector_fptoui_v1i64_v1f64() #0 {
; CHECK-LABEL: constrained_vector_fptoui_v1i64_v1f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xorpd %xmm0, %xmm0
-; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: subsd %xmm0, %xmm1
-; CHECK-NEXT: cvttsd2si %xmm1, %rax
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
+; CHECK-NEXT: comisd %xmm0, %xmm2
+; CHECK-NEXT: xorpd %xmm1, %xmm1
+; CHECK-NEXT: ja .LBB123_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: movapd %xmm2, %xmm1
+; CHECK-NEXT: .LBB123_2: # %entry
+; CHECK-NEXT: subsd %xmm1, %xmm0
+; CHECK-NEXT: cvttsd2si %xmm0, %rcx
+; CHECK-NEXT: setbe %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: shlq $63, %rax
+; CHECK-NEXT: xorq %rcx, %rax
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_fptoui_v1i64_v1f64:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vxorpd %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX1-NEXT: vsubsd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vcvttsd2si %xmm0, %rax
+; AVX1-NEXT: vcomisd %xmm0, %xmm1
+; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: ja .LBB123_2
+; AVX1-NEXT: # %bb.1: # %entry
+; AVX1-NEXT: vmovapd %xmm1, %xmm2
+; AVX1-NEXT: .LBB123_2: # %entry
+; AVX1-NEXT: vsubsd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vcvttsd2si %xmm0, %rcx
+; AVX1-NEXT: setbe %al
+; AVX1-NEXT: movzbl %al, %eax
+; AVX1-NEXT: shlq $63, %rax
+; AVX1-NEXT: xorq %rcx, %rax
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_fptoui_v1i64_v1f64:
@@ -4838,30 +5058,70 @@ entry:
define <2 x i64> @constrained_vector_fptoui_v2i64_v2f64() #0 {
; CHECK-LABEL: constrained_vector_fptoui_v2i64_v2f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xorpd %xmm0, %xmm0
-; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: subsd %xmm0, %xmm1
-; CHECK-NEXT: cvttsd2si %xmm1, %rax
-; CHECK-NEXT: movq %rax, %xmm1
; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
-; CHECK-NEXT: subsd %xmm0, %xmm2
+; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK-NEXT: comisd %xmm2, %xmm1
+; CHECK-NEXT: xorpd %xmm0, %xmm0
+; CHECK-NEXT: xorpd %xmm3, %xmm3
+; CHECK-NEXT: ja .LBB124_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: movapd %xmm1, %xmm3
+; CHECK-NEXT: .LBB124_2: # %entry
+; CHECK-NEXT: subsd %xmm3, %xmm2
; CHECK-NEXT: cvttsd2si %xmm2, %rax
-; CHECK-NEXT: movq %rax, %xmm0
-; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: setbe %cl
+; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: shlq $63, %rcx
+; CHECK-NEXT: xorq %rax, %rcx
+; CHECK-NEXT: movq %rcx, %xmm2
+; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
+; CHECK-NEXT: comisd %xmm3, %xmm1
+; CHECK-NEXT: ja .LBB124_4
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: .LBB124_4: # %entry
+; CHECK-NEXT: subsd %xmm0, %xmm3
+; CHECK-NEXT: cvttsd2si %xmm3, %rax
+; CHECK-NEXT: setbe %cl
+; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: shlq $63, %rcx
+; CHECK-NEXT: xorq %rax, %rcx
+; CHECK-NEXT: movq %rcx, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_fptoui_v2i64_v2f64:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX1-NEXT: vsubsd %xmm0, %xmm1, %xmm1
-; AVX1-NEXT: vcvttsd2si %xmm1, %rax
-; AVX1-NEXT: vmovq %rax, %xmm1
; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX1-NEXT: vsubsd %xmm0, %xmm2, %xmm0
+; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX1-NEXT: vcomisd %xmm2, %xmm0
+; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vxorpd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: ja .LBB124_2
+; AVX1-NEXT: # %bb.1: # %entry
+; AVX1-NEXT: vmovapd %xmm0, %xmm3
+; AVX1-NEXT: .LBB124_2: # %entry
+; AVX1-NEXT: vsubsd %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcvttsd2si %xmm2, %rax
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm2
+; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
+; AVX1-NEXT: vcomisd %xmm3, %xmm0
+; AVX1-NEXT: ja .LBB124_4
+; AVX1-NEXT: # %bb.3: # %entry
+; AVX1-NEXT: vmovapd %xmm0, %xmm1
+; AVX1-NEXT: .LBB124_4: # %entry
+; AVX1-NEXT: vsubsd %xmm1, %xmm3, %xmm0
; AVX1-NEXT: vcvttsd2si %xmm0, %rax
-; AVX1-NEXT: vmovq %rax, %xmm0
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm0
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX1-NEXT: retq
;
; AVX512F-LABEL: constrained_vector_fptoui_v2i64_v2f64:
@@ -4890,35 +5150,95 @@ entry:
define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 {
; CHECK-LABEL: constrained_vector_fptoui_v3i64_v3f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xorpd %xmm0, %xmm0
-; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: subsd %xmm0, %xmm1
-; CHECK-NEXT: cvttsd2si %xmm1, %rax
-; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: subsd %xmm0, %xmm1
-; CHECK-NEXT: cvttsd2si %xmm1, %rdx
+; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: subsd %xmm0, %xmm1
-; CHECK-NEXT: cvttsd2si %xmm1, %rcx
+; CHECK-NEXT: comisd %xmm2, %xmm1
+; CHECK-NEXT: xorpd %xmm0, %xmm0
+; CHECK-NEXT: xorpd %xmm3, %xmm3
+; CHECK-NEXT: ja .LBB125_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: movapd %xmm1, %xmm3
+; CHECK-NEXT: .LBB125_2: # %entry
+; CHECK-NEXT: subsd %xmm3, %xmm2
+; CHECK-NEXT: cvttsd2si %xmm2, %rcx
+; CHECK-NEXT: setbe %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: shlq $63, %rax
+; CHECK-NEXT: xorq %rcx, %rax
+; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
+; CHECK-NEXT: comisd %xmm2, %xmm1
+; CHECK-NEXT: xorpd %xmm3, %xmm3
+; CHECK-NEXT: ja .LBB125_4
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: movapd %xmm1, %xmm3
+; CHECK-NEXT: .LBB125_4: # %entry
+; CHECK-NEXT: subsd %xmm3, %xmm2
+; CHECK-NEXT: cvttsd2si %xmm2, %rcx
+; CHECK-NEXT: setbe %dl
+; CHECK-NEXT: movzbl %dl, %edx
+; CHECK-NEXT: shlq $63, %rdx
+; CHECK-NEXT: xorq %rcx, %rdx
+; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
+; CHECK-NEXT: comisd %xmm2, %xmm1
+; CHECK-NEXT: ja .LBB125_6
+; CHECK-NEXT: # %bb.5: # %entry
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: .LBB125_6: # %entry
+; CHECK-NEXT: subsd %xmm0, %xmm2
+; CHECK-NEXT: cvttsd2si %xmm2, %rsi
+; CHECK-NEXT: setbe %cl
+; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: shlq $63, %rcx
+; CHECK-NEXT: xorq %rsi, %rcx
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_fptoui_v3i64_v3f64:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX1-NEXT: vsubsd %xmm0, %xmm1, %xmm1
-; AVX1-NEXT: vcvttsd2si %xmm1, %rax
-; AVX1-NEXT: vmovq %rax, %xmm1
; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX1-NEXT: vsubsd %xmm0, %xmm2, %xmm2
+; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX1-NEXT: vcomisd %xmm2, %xmm0
+; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vxorpd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: ja .LBB125_2
+; AVX1-NEXT: # %bb.1: # %entry
+; AVX1-NEXT: vmovapd %xmm0, %xmm3
+; AVX1-NEXT: .LBB125_2: # %entry
+; AVX1-NEXT: vsubsd %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vcvttsd2si %xmm2, %rax
-; AVX1-NEXT: vmovq %rax, %xmm2
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
-; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX1-NEXT: vsubsd %xmm0, %xmm2, %xmm0
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm2
+; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
+; AVX1-NEXT: vcomisd %xmm3, %xmm0
+; AVX1-NEXT: vxorpd %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: ja .LBB125_4
+; AVX1-NEXT: # %bb.3: # %entry
+; AVX1-NEXT: vmovapd %xmm0, %xmm4
+; AVX1-NEXT: .LBB125_4: # %entry
+; AVX1-NEXT: vsubsd %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vcvttsd2si %xmm3, %rax
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm3
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
+; AVX1-NEXT: vcomisd %xmm3, %xmm0
+; AVX1-NEXT: ja .LBB125_6
+; AVX1-NEXT: # %bb.5: # %entry
+; AVX1-NEXT: vmovapd %xmm0, %xmm1
+; AVX1-NEXT: .LBB125_6: # %entry
+; AVX1-NEXT: vsubsd %xmm1, %xmm3, %xmm0
; AVX1-NEXT: vcvttsd2si %xmm0, %rax
-; AVX1-NEXT: vmovq %rax, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_fptoui_v3i64_v3f64:
@@ -4943,49 +5263,129 @@ entry:
define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 {
; CHECK-LABEL: constrained_vector_fptoui_v4i64_v4f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xorpd %xmm1, %xmm1
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: subsd %xmm1, %xmm0
-; CHECK-NEXT: cvttsd2si %xmm0, %rax
-; CHECK-NEXT: movq %rax, %xmm2
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: subsd %xmm1, %xmm0
-; CHECK-NEXT: cvttsd2si %xmm0, %rax
-; CHECK-NEXT: movq %rax, %xmm0
-; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
-; CHECK-NEXT: subsd %xmm1, %xmm2
-; CHECK-NEXT: cvttsd2si %xmm2, %rax
-; CHECK-NEXT: movq %rax, %xmm2
-; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
-; CHECK-NEXT: subsd %xmm1, %xmm3
-; CHECK-NEXT: cvttsd2si %xmm3, %rax
-; CHECK-NEXT: movq %rax, %xmm1
-; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; CHECK-NEXT: comisd %xmm0, %xmm2
+; CHECK-NEXT: xorpd %xmm1, %xmm1
+; CHECK-NEXT: xorpd %xmm3, %xmm3
+; CHECK-NEXT: ja .LBB126_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: movapd %xmm2, %xmm3
+; CHECK-NEXT: .LBB126_2: # %entry
+; CHECK-NEXT: subsd %xmm3, %xmm0
+; CHECK-NEXT: cvttsd2si %xmm0, %rcx
+; CHECK-NEXT: setbe %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: shlq $63, %rax
+; CHECK-NEXT: xorq %rcx, %rax
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: comisd %xmm0, %xmm2
+; CHECK-NEXT: xorpd %xmm4, %xmm4
+; CHECK-NEXT: ja .LBB126_4
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: movapd %xmm2, %xmm4
+; CHECK-NEXT: .LBB126_4: # %entry
+; CHECK-NEXT: movq %rax, %xmm3
+; CHECK-NEXT: subsd %xmm4, %xmm0
+; CHECK-NEXT: cvttsd2si %xmm0, %rax
+; CHECK-NEXT: setbe %cl
+; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: shlq $63, %rcx
+; CHECK-NEXT: xorq %rax, %rcx
+; CHECK-NEXT: movq %rcx, %xmm0
+; CHECK-NEXT: movsd {{.*#+}} xmm4 = mem[0],zero
+; CHECK-NEXT: comisd %xmm4, %xmm2
+; CHECK-NEXT: xorpd %xmm5, %xmm5
+; CHECK-NEXT: ja .LBB126_6
+; CHECK-NEXT: # %bb.5: # %entry
+; CHECK-NEXT: movapd %xmm2, %xmm5
+; CHECK-NEXT: .LBB126_6: # %entry
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; CHECK-NEXT: subsd %xmm5, %xmm4
+; CHECK-NEXT: cvttsd2si %xmm4, %rax
+; CHECK-NEXT: setbe %cl
+; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: shlq $63, %rcx
+; CHECK-NEXT: xorq %rax, %rcx
+; CHECK-NEXT: movq %rcx, %xmm3
+; CHECK-NEXT: movsd {{.*#+}} xmm4 = mem[0],zero
+; CHECK-NEXT: comisd %xmm4, %xmm2
+; CHECK-NEXT: ja .LBB126_8
+; CHECK-NEXT: # %bb.7: # %entry
+; CHECK-NEXT: movapd %xmm2, %xmm1
+; CHECK-NEXT: .LBB126_8: # %entry
+; CHECK-NEXT: subsd %xmm1, %xmm4
+; CHECK-NEXT: cvttsd2si %xmm4, %rax
+; CHECK-NEXT: setbe %cl
+; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: shlq $63, %rcx
+; CHECK-NEXT: xorq %rax, %rcx
+; CHECK-NEXT: movq %rcx, %xmm1
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_fptoui_v4i64_v4f64:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX1-NEXT: vsubsd %xmm0, %xmm1, %xmm1
-; AVX1-NEXT: vcvttsd2si %xmm1, %rax
-; AVX1-NEXT: vmovq %rax, %xmm1
; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX1-NEXT: vsubsd %xmm0, %xmm2, %xmm2
-; AVX1-NEXT: vcvttsd2si %xmm2, %rax
-; AVX1-NEXT: vmovq %rax, %xmm2
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
-; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX1-NEXT: vsubsd %xmm0, %xmm2, %xmm2
-; AVX1-NEXT: vcvttsd2si %xmm2, %rax
-; AVX1-NEXT: vmovq %rax, %xmm2
+; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX1-NEXT: vcomisd %xmm2, %xmm0
+; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vxorpd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: ja .LBB126_2
+; AVX1-NEXT: # %bb.1: # %entry
+; AVX1-NEXT: vmovapd %xmm0, %xmm3
+; AVX1-NEXT: .LBB126_2: # %entry
+; AVX1-NEXT: vsubsd %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcvttsd2si %xmm2, %rcx
+; AVX1-NEXT: setbe %al
+; AVX1-NEXT: movzbl %al, %eax
+; AVX1-NEXT: shlq $63, %rax
+; AVX1-NEXT: xorq %rcx, %rax
; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
-; AVX1-NEXT: vsubsd %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vcomisd %xmm3, %xmm0
+; AVX1-NEXT: vxorpd %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: ja .LBB126_4
+; AVX1-NEXT: # %bb.3: # %entry
+; AVX1-NEXT: vmovapd %xmm0, %xmm4
+; AVX1-NEXT: .LBB126_4: # %entry
+; AVX1-NEXT: vmovq %rax, %xmm2
+; AVX1-NEXT: vsubsd %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vcvttsd2si %xmm3, %rax
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm3
+; AVX1-NEXT: vmovsd {{.*#+}} xmm4 = mem[0],zero
+; AVX1-NEXT: vcomisd %xmm4, %xmm0
+; AVX1-NEXT: vxorpd %xmm5, %xmm5, %xmm5
+; AVX1-NEXT: ja .LBB126_6
+; AVX1-NEXT: # %bb.5: # %entry
+; AVX1-NEXT: vmovapd %xmm0, %xmm5
+; AVX1-NEXT: .LBB126_6: # %entry
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; AVX1-NEXT: vsubsd %xmm5, %xmm4, %xmm3
+; AVX1-NEXT: vcvttsd2si %xmm3, %rax
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm3
+; AVX1-NEXT: vmovsd {{.*#+}} xmm4 = mem[0],zero
+; AVX1-NEXT: vcomisd %xmm4, %xmm0
+; AVX1-NEXT: ja .LBB126_8
+; AVX1-NEXT: # %bb.7: # %entry
+; AVX1-NEXT: vmovapd %xmm0, %xmm1
+; AVX1-NEXT: .LBB126_8: # %entry
+; AVX1-NEXT: vsubsd %xmm1, %xmm4, %xmm0
; AVX1-NEXT: vcvttsd2si %xmm0, %rax
-; AVX1-NEXT: vmovq %rax, %xmm0
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm0
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX512F-LABEL: constrained_vector_fptoui_v4i64_v4f64:
@@ -6384,34 +6784,34 @@ entry:
define <1 x float> @constrained_vector_uitofp_v1f32_v1i64(<1 x i64> %x) #0 {
; CHECK-LABEL: constrained_vector_uitofp_v1f32_v1i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: testq %rdi, %rdi
-; CHECK-NEXT: js .LBB170_1
-; CHECK-NEXT: # %bb.2: # %entry
-; CHECK-NEXT: cvtsi2ss %rdi, %xmm0
-; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB170_1:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: shrq %rax
-; CHECK-NEXT: andl $1, %edi
-; CHECK-NEXT: orq %rax, %rdi
-; CHECK-NEXT: cvtsi2ss %rdi, %xmm0
+; CHECK-NEXT: movl %edi, %ecx
+; CHECK-NEXT: andl $1, %ecx
+; CHECK-NEXT: orq %rax, %rcx
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: cmovnsq %rdi, %rcx
+; CHECK-NEXT: cvtsi2ss %rcx, %xmm0
+; CHECK-NEXT: jns .LBB170_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: addss %xmm0, %xmm0
+; CHECK-NEXT: .LBB170_2: # %entry
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_uitofp_v1f32_v1i64:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: testq %rdi, %rdi
-; AVX1-NEXT: js .LBB170_1
-; AVX1-NEXT: # %bb.2: # %entry
-; AVX1-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
-; AVX1-NEXT: retq
-; AVX1-NEXT: .LBB170_1:
; AVX1-NEXT: movq %rdi, %rax
; AVX1-NEXT: shrq %rax
-; AVX1-NEXT: andl $1, %edi
-; AVX1-NEXT: orq %rax, %rdi
-; AVX1-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
+; AVX1-NEXT: movl %edi, %ecx
+; AVX1-NEXT: andl $1, %ecx
+; AVX1-NEXT: orq %rax, %rcx
+; AVX1-NEXT: testq %rdi, %rdi
+; AVX1-NEXT: cmovnsq %rdi, %rcx
+; AVX1-NEXT: vcvtsi2ss %rcx, %xmm0, %xmm0
+; AVX1-NEXT: jns .LBB170_2
+; AVX1-NEXT: # %bb.1:
; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: .LBB170_2: # %entry
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_uitofp_v1f32_v1i64:
@@ -6548,74 +6948,65 @@ define <2 x float> @constrained_vector_uitofp_v2f32_v2i64(<2 x i64> %x) #0 {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: movq %xmm0, %rax
-; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: js .LBB174_1
-; CHECK-NEXT: # %bb.2: # %entry
-; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: cvtsi2ss %rax, %xmm0
-; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; CHECK-NEXT: movq %xmm1, %rax
-; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: jns .LBB174_5
-; CHECK-NEXT: .LBB174_4:
; CHECK-NEXT: movq %rax, %rcx
; CHECK-NEXT: shrq %rcx
-; CHECK-NEXT: andl $1, %eax
-; CHECK-NEXT: orq %rcx, %rax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: cvtsi2ss %rax, %xmm1
-; CHECK-NEXT: addss %xmm1, %xmm1
-; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB174_1:
-; CHECK-NEXT: movq %rax, %rcx
-; CHECK-NEXT: shrq %rcx
-; CHECK-NEXT: andl $1, %eax
-; CHECK-NEXT: orq %rcx, %rax
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: andl $1, %edx
+; CHECK-NEXT: orq %rcx, %rdx
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: cmovnsq %rax, %rdx
; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: cvtsi2ss %rax, %xmm0
+; CHECK-NEXT: cvtsi2ss %rdx, %xmm0
+; CHECK-NEXT: jns .LBB174_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: addss %xmm0, %xmm0
+; CHECK-NEXT: .LBB174_2: # %entry
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; CHECK-NEXT: movq %xmm1, %rax
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: shrq %rcx
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: andl $1, %edx
+; CHECK-NEXT: orq %rcx, %rdx
; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: js .LBB174_4
-; CHECK-NEXT: .LBB174_5: # %entry
+; CHECK-NEXT: cmovnsq %rax, %rdx
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: cvtsi2ss %rax, %xmm1
+; CHECK-NEXT: cvtsi2ss %rdx, %xmm1
+; CHECK-NEXT: jns .LBB174_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: addss %xmm1, %xmm1
+; CHECK-NEXT: .LBB174_4: # %entry
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_uitofp_v2f32_v2i64:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
-; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: js .LBB174_1
-; AVX1-NEXT: # %bb.2: # %entry
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
-; AVX1-NEXT: vmovq %xmm0, %rax
-; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: jns .LBB174_5
-; AVX1-NEXT: .LBB174_4:
; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shrq %rcx
-; AVX1-NEXT: andl $1, %eax
-; AVX1-NEXT: orq %rcx, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX1-NEXT: retq
-; AVX1-NEXT: .LBB174_1:
-; AVX1-NEXT: movq %rax, %rcx
-; AVX1-NEXT: shrq %rcx
-; AVX1-NEXT: andl $1, %eax
-; AVX1-NEXT: orq %rcx, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; AVX1-NEXT: movl %eax, %edx
+; AVX1-NEXT: andl $1, %edx
+; AVX1-NEXT: orq %rcx, %rdx
+; AVX1-NEXT: testq %rax, %rax
+; AVX1-NEXT: cmovnsq %rax, %rdx
+; AVX1-NEXT: vcvtsi2ss %rdx, %xmm1, %xmm1
+; AVX1-NEXT: jns .LBB174_2
+; AVX1-NEXT: # %bb.1:
; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: .LBB174_2: # %entry
; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shrq %rcx
+; AVX1-NEXT: movl %eax, %edx
+; AVX1-NEXT: andl $1, %edx
+; AVX1-NEXT: orq %rcx, %rdx
; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: js .LBB174_4
-; AVX1-NEXT: .LBB174_5: # %entry
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
+; AVX1-NEXT: cmovnsq %rax, %rdx
+; AVX1-NEXT: vcvtsi2ss %rdx, %xmm2, %xmm0
+; AVX1-NEXT: jns .LBB174_4
+; AVX1-NEXT: # %bb.3:
+; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: .LBB174_4: # %entry
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX1-NEXT: retq
;
@@ -6805,100 +7196,90 @@ entry:
define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
; CHECK-LABEL: constrained_vector_uitofp_v3f32_v3i64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movq %rsi, %rax
+; CHECK-NEXT: shrq %rax
+; CHECK-NEXT: movl %esi, %ecx
+; CHECK-NEXT: andl $1, %ecx
+; CHECK-NEXT: orq %rax, %rcx
; CHECK-NEXT: testq %rsi, %rsi
-; CHECK-NEXT: js .LBB178_1
-; CHECK-NEXT: # %bb.2: # %entry
-; CHECK-NEXT: cvtsi2ss %rsi, %xmm1
-; CHECK-NEXT: testq %rdi, %rdi
-; CHECK-NEXT: jns .LBB178_5
-; CHECK-NEXT: .LBB178_4:
+; CHECK-NEXT: cmovnsq %rsi, %rcx
+; CHECK-NEXT: cvtsi2ss %rcx, %xmm1
+; CHECK-NEXT: jns .LBB178_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: addss %xmm1, %xmm1
+; CHECK-NEXT: .LBB178_2: # %entry
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: shrq %rax
-; CHECK-NEXT: andl $1, %edi
-; CHECK-NEXT: orq %rax, %rdi
-; CHECK-NEXT: cvtsi2ss %rdi, %xmm0
+; CHECK-NEXT: movl %edi, %ecx
+; CHECK-NEXT: andl $1, %ecx
+; CHECK-NEXT: orq %rax, %rcx
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: cmovnsq %rdi, %rcx
+; CHECK-NEXT: cvtsi2ss %rcx, %xmm0
+; CHECK-NEXT: jns .LBB178_4
+; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: addss %xmm0, %xmm0
+; CHECK-NEXT: .LBB178_4: # %entry
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; CHECK-NEXT: testq %rdx, %rdx
-; CHECK-NEXT: jns .LBB178_8
-; CHECK-NEXT: .LBB178_7:
; CHECK-NEXT: movq %rdx, %rax
; CHECK-NEXT: shrq %rax
-; CHECK-NEXT: andl $1, %edx
-; CHECK-NEXT: orq %rax, %rdx
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: cvtsi2ss %rdx, %xmm1
-; CHECK-NEXT: addss %xmm1, %xmm1
-; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB178_1:
-; CHECK-NEXT: movq %rsi, %rax
-; CHECK-NEXT: shrq %rax
-; CHECK-NEXT: andl $1, %esi
-; CHECK-NEXT: orq %rax, %rsi
-; CHECK-NEXT: cvtsi2ss %rsi, %xmm1
-; CHECK-NEXT: addss %xmm1, %xmm1
-; CHECK-NEXT: testq %rdi, %rdi
-; CHECK-NEXT: js .LBB178_4
-; CHECK-NEXT: .LBB178_5: # %entry
-; CHECK-NEXT: cvtsi2ss %rdi, %xmm0
-; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; CHECK-NEXT: movl %edx, %ecx
+; CHECK-NEXT: andl $1, %ecx
+; CHECK-NEXT: orq %rax, %rcx
; CHECK-NEXT: testq %rdx, %rdx
-; CHECK-NEXT: js .LBB178_7
-; CHECK-NEXT: .LBB178_8: # %entry
+; CHECK-NEXT: cmovnsq %rdx, %rcx
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: cvtsi2ss %rdx, %xmm1
+; CHECK-NEXT: cvtsi2ss %rcx, %xmm1
+; CHECK-NEXT: jns .LBB178_6
+; CHECK-NEXT: # %bb.5:
+; CHECK-NEXT: addss %xmm1, %xmm1
+; CHECK-NEXT: .LBB178_6: # %entry
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_uitofp_v3f32_v3i64:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
-; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: js .LBB178_1
-; AVX1-NEXT: # %bb.2: # %entry
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
-; AVX1-NEXT: vmovq %xmm0, %rax
-; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: jns .LBB178_5
-; AVX1-NEXT: .LBB178_4:
; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shrq %rcx
-; AVX1-NEXT: andl $1, %eax
-; AVX1-NEXT: orq %rcx, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
-; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: jmp .LBB178_6
-; AVX1-NEXT: .LBB178_1:
-; AVX1-NEXT: movq %rax, %rcx
-; AVX1-NEXT: shrq %rcx
-; AVX1-NEXT: andl $1, %eax
-; AVX1-NEXT: orq %rcx, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; AVX1-NEXT: movl %eax, %edx
+; AVX1-NEXT: andl $1, %edx
+; AVX1-NEXT: orq %rcx, %rdx
+; AVX1-NEXT: testq %rax, %rax
+; AVX1-NEXT: cmovnsq %rax, %rdx
+; AVX1-NEXT: vcvtsi2ss %rdx, %xmm1, %xmm1
+; AVX1-NEXT: jns .LBB178_2
+; AVX1-NEXT: # %bb.1:
; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: .LBB178_2: # %entry
; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shrq %rcx
+; AVX1-NEXT: movl %eax, %edx
+; AVX1-NEXT: andl $1, %edx
+; AVX1-NEXT: orq %rcx, %rdx
; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: js .LBB178_4
-; AVX1-NEXT: .LBB178_5: # %entry
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
-; AVX1-NEXT: .LBB178_6: # %entry
+; AVX1-NEXT: cmovnsq %rax, %rdx
+; AVX1-NEXT: vcvtsi2ss %rdx, %xmm2, %xmm2
+; AVX1-NEXT: jns .LBB178_4
+; AVX1-NEXT: # %bb.3:
+; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: .LBB178_4: # %entry
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vmovq %xmm0, %rax
-; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: js .LBB178_7
-; AVX1-NEXT: # %bb.8: # %entry
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
-; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-; AVX1-NEXT: .LBB178_7:
; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shrq %rcx
-; AVX1-NEXT: andl $1, %eax
-; AVX1-NEXT: orq %rcx, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
+; AVX1-NEXT: movl %eax, %edx
+; AVX1-NEXT: andl $1, %edx
+; AVX1-NEXT: orq %rcx, %rdx
+; AVX1-NEXT: testq %rax, %rax
+; AVX1-NEXT: cmovnsq %rax, %rdx
+; AVX1-NEXT: vcvtsi2ss %rdx, %xmm3, %xmm0
+; AVX1-NEXT: jns .LBB178_6
+; AVX1-NEXT: # %bb.5:
; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: .LBB178_6: # %entry
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -6949,10 +7330,10 @@ define <4 x double> @constrained_vector_uitofp_v4f64_v4i32(<4 x i32> %x) #0 {
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
-; AVX1-NEXT: vcvtdq2pd %xmm1, %ymm1
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX1-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2pd %xmm1, %ymm1
; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -6978,12 +7359,12 @@ entry:
define <4 x float> @constrained_vector_uitofp_v4f32_v4i32(<4 x i32> %x) #0 {
; CHECK-LABEL: constrained_vector_uitofp_v4f32_v4i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movaps {{.*#+}} xmm1 = [65535,65535,65535,65535]
-; CHECK-NEXT: andps %xmm0, %xmm1
+; CHECK-NEXT: movdqa %xmm0, %xmm1
+; CHECK-NEXT: psrld $16, %xmm1
; CHECK-NEXT: cvtdq2ps %xmm1, %xmm1
-; CHECK-NEXT: psrld $16, %xmm0
+; CHECK-NEXT: mulps {{.*}}(%rip), %xmm1
+; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
; CHECK-NEXT: cvtdq2ps %xmm0, %xmm0
-; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
; CHECK-NEXT: addps %xmm1, %xmm0
; CHECK-NEXT: retq
;
@@ -6991,10 +7372,10 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i32(<4 x i32> %x) #0 {
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
-; AVX1-NEXT: vcvtdq2ps %xmm1, %xmm1
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
; AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0
; AVX1-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vcvtdq2ps %xmm1, %xmm1
; AVX1-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
@@ -7078,73 +7459,62 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
; CHECK-LABEL: constrained_vector_uitofp_v4f32_v4i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq %xmm1, %rax
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: shrq %rcx
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: andl $1, %edx
+; CHECK-NEXT: orq %rcx, %rdx
; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: js .LBB182_1
-; CHECK-NEXT: # %bb.2: # %entry
-; CHECK-NEXT: cvtsi2ss %rax, %xmm2
+; CHECK-NEXT: cmovnsq %rax, %rdx
+; CHECK-NEXT: cvtsi2ss %rdx, %xmm2
+; CHECK-NEXT: jns .LBB182_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: addss %xmm2, %xmm2
+; CHECK-NEXT: .LBB182_2: # %entry
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; CHECK-NEXT: movq %xmm1, %rax
-; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: jns .LBB182_5
-; CHECK-NEXT: .LBB182_4:
; CHECK-NEXT: movq %rax, %rcx
; CHECK-NEXT: shrq %rcx
-; CHECK-NEXT: andl $1, %eax
-; CHECK-NEXT: orq %rcx, %rax
-; CHECK-NEXT: cvtsi2ss %rax, %xmm3
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: andl $1, %edx
+; CHECK-NEXT: orq %rcx, %rdx
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: cmovnsq %rax, %rdx
+; CHECK-NEXT: cvtsi2ss %rdx, %xmm3
+; CHECK-NEXT: jns .LBB182_4
+; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: addss %xmm3, %xmm3
+; CHECK-NEXT: .LBB182_4: # %entry
; CHECK-NEXT: movq %xmm0, %rax
-; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: jns .LBB182_8
-; CHECK-NEXT: .LBB182_7:
; CHECK-NEXT: movq %rax, %rcx
; CHECK-NEXT: shrq %rcx
-; CHECK-NEXT: andl $1, %eax
-; CHECK-NEXT: orq %rcx, %rax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: cvtsi2ss %rax, %xmm1
-; CHECK-NEXT: addss %xmm1, %xmm1
-; CHECK-NEXT: jmp .LBB182_9
-; CHECK-NEXT: .LBB182_1:
-; CHECK-NEXT: movq %rax, %rcx
-; CHECK-NEXT: shrq %rcx
-; CHECK-NEXT: andl $1, %eax
-; CHECK-NEXT: orq %rcx, %rax
-; CHECK-NEXT: cvtsi2ss %rax, %xmm2
-; CHECK-NEXT: addss %xmm2, %xmm2
-; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; CHECK-NEXT: movq %xmm1, %rax
-; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: js .LBB182_4
-; CHECK-NEXT: .LBB182_5: # %entry
-; CHECK-NEXT: cvtsi2ss %rax, %xmm3
-; CHECK-NEXT: movq %xmm0, %rax
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: andl $1, %edx
+; CHECK-NEXT: orq %rcx, %rdx
; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: js .LBB182_7
-; CHECK-NEXT: .LBB182_8: # %entry
+; CHECK-NEXT: cmovnsq %rax, %rdx
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: cvtsi2ss %rax, %xmm1
-; CHECK-NEXT: .LBB182_9: # %entry
+; CHECK-NEXT: cvtsi2ss %rdx, %xmm1
+; CHECK-NEXT: jns .LBB182_6
+; CHECK-NEXT: # %bb.5:
+; CHECK-NEXT: addss %xmm1, %xmm1
+; CHECK-NEXT: .LBB182_6: # %entry
; CHECK-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-NEXT: movq %xmm0, %rax
-; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: js .LBB182_10
-; CHECK-NEXT: # %bb.11: # %entry
-; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: cvtsi2ss %rax, %xmm0
-; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; CHECK-NEXT: movaps %xmm1, %xmm0
-; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB182_10:
; CHECK-NEXT: movq %rax, %rcx
; CHECK-NEXT: shrq %rcx
-; CHECK-NEXT: andl $1, %eax
-; CHECK-NEXT: orq %rcx, %rax
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: andl $1, %edx
+; CHECK-NEXT: orq %rcx, %rdx
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: cmovnsq %rax, %rdx
; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: cvtsi2ss %rax, %xmm0
+; CHECK-NEXT: cvtsi2ss %rdx, %xmm0
+; CHECK-NEXT: jns .LBB182_8
+; CHECK-NEXT: # %bb.7:
; CHECK-NEXT: addss %xmm0, %xmm0
+; CHECK-NEXT: .LBB182_8: # %entry
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; CHECK-NEXT: movaps %xmm1, %xmm0
@@ -7153,68 +7523,60 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
; AVX1-LABEL: constrained_vector_uitofp_v4f32_v4i64:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
-; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: js .LBB182_1
-; AVX1-NEXT: # %bb.2: # %entry
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
-; AVX1-NEXT: vmovq %xmm0, %rax
-; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: jns .LBB182_5
-; AVX1-NEXT: .LBB182_4:
-; AVX1-NEXT: movq %rax, %rcx
-; AVX1-NEXT: shrq %rcx
-; AVX1-NEXT: andl $1, %eax
-; AVX1-NEXT: orq %rcx, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
-; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: jmp .LBB182_6
-; AVX1-NEXT: .LBB182_1:
; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shrq %rcx
-; AVX1-NEXT: andl $1, %eax
-; AVX1-NEXT: orq %rcx, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; AVX1-NEXT: movl %eax, %edx
+; AVX1-NEXT: andl $1, %edx
+; AVX1-NEXT: orq %rcx, %rdx
+; AVX1-NEXT: testq %rax, %rax
+; AVX1-NEXT: cmovnsq %rax, %rdx
+; AVX1-NEXT: vcvtsi2ss %rdx, %xmm1, %xmm1
+; AVX1-NEXT: jns .LBB182_2
+; AVX1-NEXT: # %bb.1:
; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: .LBB182_2: # %entry
; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shrq %rcx
+; AVX1-NEXT: movl %eax, %edx
+; AVX1-NEXT: andl $1, %edx
+; AVX1-NEXT: orq %rcx, %rdx
; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: js .LBB182_4
-; AVX1-NEXT: .LBB182_5: # %entry
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
-; AVX1-NEXT: .LBB182_6: # %entry
+; AVX1-NEXT: cmovnsq %rax, %rdx
+; AVX1-NEXT: vcvtsi2ss %rdx, %xmm2, %xmm2
+; AVX1-NEXT: jns .LBB182_4
+; AVX1-NEXT: # %bb.3:
+; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: .LBB182_4: # %entry
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vmovq %xmm0, %rax
-; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: js .LBB182_7
-; AVX1-NEXT: # %bb.8: # %entry
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
-; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
-; AVX1-NEXT: vpextrq $1, %xmm0, %rax
-; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: jns .LBB182_11
-; AVX1-NEXT: .LBB182_10:
; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shrq %rcx
-; AVX1-NEXT: andl $1, %eax
-; AVX1-NEXT: orq %rcx, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
-; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-; AVX1-NEXT: .LBB182_7:
-; AVX1-NEXT: movq %rax, %rcx
-; AVX1-NEXT: shrq %rcx
-; AVX1-NEXT: andl $1, %eax
-; AVX1-NEXT: orq %rcx, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
+; AVX1-NEXT: movl %eax, %edx
+; AVX1-NEXT: andl $1, %edx
+; AVX1-NEXT: orq %rcx, %rdx
+; AVX1-NEXT: testq %rax, %rax
+; AVX1-NEXT: cmovnsq %rax, %rdx
+; AVX1-NEXT: vcvtsi2ss %rdx, %xmm3, %xmm2
+; AVX1-NEXT: jns .LBB182_6
+; AVX1-NEXT: # %bb.5:
; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: .LBB182_6: # %entry
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shrq %rcx
+; AVX1-NEXT: movl %eax, %edx
+; AVX1-NEXT: andl $1, %edx
+; AVX1-NEXT: orq %rcx, %rdx
; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: js .LBB182_10
-; AVX1-NEXT: .LBB182_11: # %entry
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
+; AVX1-NEXT: cmovnsq %rax, %rdx
+; AVX1-NEXT: vcvtsi2ss %rdx, %xmm3, %xmm0
+; AVX1-NEXT: jns .LBB182_8
+; AVX1-NEXT: # %bb.7:
+; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: .LBB182_8: # %entry
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -7238,39 +7600,28 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
;
; AVX512DQ-LABEL: constrained_vector_uitofp_v4f32_v4i64:
; AVX512DQ: # %bb.0: # %entry
-; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax
-; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
-; AVX512DQ-NEXT: vmovq %xmm0, %rax
-; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
-; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
-; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX512DQ-NEXT: vmovq %xmm2, %rax
-; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3
-; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
-; AVX512DQ-NEXT: vpextrq $1, %xmm2, %rax
-; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2
-; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
+; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512DQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm1
; AVX512DQ-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1]
; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm2
; AVX512DQ-NEXT: vpsrlq $1, %ymm0, %ymm3
; AVX512DQ-NEXT: vpor %ymm3, %ymm2, %ymm2
-; AVX512DQ-NEXT: vpextrq $1, %xmm2, %rax
+; AVX512DQ-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2
+; AVX512DQ-NEXT: vmovq %xmm0, %rax
; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
-; AVX512DQ-NEXT: vmovq %xmm2, %rax
-; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4
-; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
-; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm2
-; AVX512DQ-NEXT: vmovq %xmm2, %rax
-; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
-; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
-; AVX512DQ-NEXT: vpextrq $1, %xmm2, %rax
-; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm2
-; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[0]
-; AVX512DQ-NEXT: vaddps %xmm2, %xmm2, %xmm2
-; AVX512DQ-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; AVX512DQ-NEXT: vpcmpgtq %ymm0, %ymm3, %ymm0
-; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512DQ-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
+; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512DQ-NEXT: vmovq %xmm0, %rax
+; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
+; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
+; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm0
+; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
+; AVX512DQ-NEXT: vaddps %xmm0, %xmm0, %xmm2
+; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
+; AVX512DQ-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
entry:
More information about the llvm-commits
mailing list