[llvm-branch-commits] [llvm] release/22.x: [SelectionDAG] Fix fptoui.sat expansion using minnum/maxnum (#180178) (PR #184565)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sat Mar 7 02:50:54 PST 2026
https://github.com/dyung updated https://github.com/llvm/llvm-project/pull/184565
>From dfb0ecaa8bfe29c599e2ab4448a390204181ced6 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Wed, 25 Feb 2026 13:23:42 +0100
Subject: [PATCH] [SelectionDAG] Fix fptoui.sat expansion using minnum/maxnum
(#180178)
fptoui.sat can currently use a minnum/maxnum based expansion, which
relies on NaNs not being propagated. Specifically, it relies on
minnum(maxnum(NaN, 0), MAX) to return 0. However, if the input is sNaN,
then maxnum(sNaN, 0) is allowed to return qNaN, in which case the final
result will be MAX rather than 0.
This PR does the following changes:
* Support the fold for minimumnum/maximumnum, which guarantees that NaN
is not propagated even for sNaN, so it can use the old lowering. Test
this using Hexagon which has legal minimumnum but illegal minnum.
* For the minnum/maxnum case, remove the special unsigned case and
instead always insert the explicit NaN check. In that case the NaN
propagation semantics don't matter.
* This also means that we can support this expansion for
minimum/maximum.
(cherry picked from commit cfca635efc11e4de10d560810fd4837510953c05)
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 38 +-
llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll | 238 ++---
llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll | 219 +++--
llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll | 226 ++---
llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll | 344 ++++---
llvm/test/CodeGen/Hexagon/fptoi.sat.ll | 247 +++++
.../CodeGen/Thumb2/mve-fptoui-sat-vector.ll | 872 ++++++++++++------
llvm/test/CodeGen/X86/fpclamptosat.ll | 24 +-
llvm/test/CodeGen/X86/fptosi-sat-scalar.ll | 157 ++--
.../test/CodeGen/X86/fptosi-sat-vector-128.ll | 291 +++---
llvm/test/CodeGen/X86/fptoui-sat-scalar.ll | 162 ++--
.../test/CodeGen/X86/fptoui-sat-vector-128.ll | 238 ++---
12 files changed, 1657 insertions(+), 1399 deletions(-)
create mode 100644 llvm/test/CodeGen/Hexagon/fptoi.sat.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index ee489f9fc74f9..8f4e716c1d665 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -11868,22 +11868,27 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
// If the integer bounds are exactly representable as floats and min/max are
// legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
// of comparisons and selects.
- bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
- isOperationLegal(ISD::FMAXNUM, SrcVT);
- if (AreExactFloatBounds && MinMaxLegal) {
+ auto EmitMinMax = [&](unsigned MinOpcode, unsigned MaxOpcode,
+ bool MayPropagateNaN) {
+ bool MinMaxLegal = isOperationLegalOrCustom(MinOpcode, SrcVT) &&
+ isOperationLegalOrCustom(MaxOpcode, SrcVT);
+ if (!MinMaxLegal)
+ return SDValue();
+
SDValue Clamped = Src;
- // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
- Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
- // Clamp by MaxFloat from above. NaN cannot occur.
- Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
+ // Clamp Src by MinFloat from below. If !MayPropagateNaN and Src is NaN
+ // then the result is MinFloat.
+ Clamped = DAG.getNode(MaxOpcode, dl, SrcVT, Clamped, MinFloatNode);
+ // Clamp by MaxFloat from above. If !MayPropagateNaN then NaN cannot occur.
+ Clamped = DAG.getNode(MinOpcode, dl, SrcVT, Clamped, MaxFloatNode);
// Convert clamped value to integer.
SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
dl, DstVT, Clamped);
- // In the unsigned case we're done, because we mapped NaN to MinFloat,
- // which will cast to zero.
- if (!IsSigned)
+ // If !MayPropagateNan and the conversion is unsigned case we're done,
+ // because we mapped NaN to MinFloat, which will cast to zero.
+ if (!MayPropagateNaN && !IsSigned)
return FpToInt;
// Otherwise, select 0 if Src is NaN.
@@ -11892,6 +11897,19 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
+ };
+ if (AreExactFloatBounds) {
+ if (SDValue Res = EmitMinMax(ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM,
+ /*MayPropagateNaN=*/false))
+ return Res;
+ // These may propagate NaN for sNaN operands.
+ if (SDValue Res =
+ EmitMinMax(ISD::FMINNUM, ISD::FMAXNUM, /*MayPropagateNaN=*/true))
+ return Res;
+ // These always propagate NaN.
+ if (SDValue Res =
+ EmitMinMax(ISD::FMINIMUM, ISD::FMAXIMUM, /*MayPropagateNaN=*/true))
+ return Res;
}
SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
diff --git a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll
index 897ade00320db..e9db8434869bb 100644
--- a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll
@@ -86,18 +86,16 @@ define <vscale x 4 x i16> @test_signed_v4f32_v4i16(<vscale x 4 x float> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #-956301312 // =0xc7000000
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: mov z2.s, #-32768 // =0xffffffffffff8000
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: mov w8, #65024 // =0xfe00
; CHECK-NEXT: movk w8, #18175, lsl #16
-; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: mov z1.s, w8
-; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: mov z1.s, #32767 // =0x7fff
-; CHECK-NEXT: fcvtzs z2.s, p1/m, z0.s
+; CHECK-NEXT: mov z2.s, w8
+; CHECK-NEXT: fmaxnm z1.s, p0/m, z1.s, z0.s
+; CHECK-NEXT: fminnm z1.s, p0/m, z1.s, z2.s
+; CHECK-NEXT: fcvtzs z1.s, p0/m, z1.s
; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s
-; CHECK-NEXT: sel z0.s, p2, z1.s, z2.s
-; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0
+; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
%x = call <vscale x 4 x i16> @llvm.fptosi.sat.nxv4f32.nxv4i16(<vscale x 4 x float> %f)
ret <vscale x 4 x i16> %x
@@ -108,26 +106,22 @@ define <vscale x 8 x i16> @test_signed_v8f32_v8i16(<vscale x 8 x float> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #-956301312 // =0xc7000000
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: mov z3.s, #-32768 // =0xffffffffffff8000
; CHECK-NEXT: mov z2.s, w8
; CHECK-NEXT: mov w8, #65024 // =0xfe00
-; CHECK-NEXT: mov z5.s, #32767 // =0x7fff
; CHECK-NEXT: movk w8, #18175, lsl #16
; CHECK-NEXT: mov z4.s, w8
-; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, z2.s
-; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z2.s
-; CHECK-NEXT: mov z2.s, #-32768 // =0xffffffffffff8000
-; CHECK-NEXT: fcmgt p3.s, p0/z, z0.s, z4.s
-; CHECK-NEXT: fcvtzs z3.s, p1/m, z1.s
-; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z4.s
-; CHECK-NEXT: fcvtzs z2.s, p2/m, z0.s
-; CHECK-NEXT: fcmuo p2.s, p0/z, z1.s, z1.s
+; CHECK-NEXT: movprfx z3, z1
+; CHECK-NEXT: fmaxnm z3.s, p0/m, z3.s, z2.s
+; CHECK-NEXT: fmaxnm z2.s, p0/m, z2.s, z0.s
+; CHECK-NEXT: fcmuo p1.s, p0/z, z1.s, z1.s
+; CHECK-NEXT: fminnm z3.s, p0/m, z3.s, z4.s
+; CHECK-NEXT: fminnm z2.s, p0/m, z2.s, z4.s
+; CHECK-NEXT: fcvtzs z3.s, p0/m, z3.s
+; CHECK-NEXT: fcvtzs z2.s, p0/m, z2.s
; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s
-; CHECK-NEXT: sel z0.s, p1, z5.s, z3.s
-; CHECK-NEXT: sel z1.s, p3, z5.s, z2.s
-; CHECK-NEXT: mov z0.s, p2/m, #0 // =0x0
-; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0
-; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
+; CHECK-NEXT: mov z3.s, p1/m, #0 // =0x0
+; CHECK-NEXT: mov z2.s, p0/m, #0 // =0x0
+; CHECK-NEXT: uzp1 z0.h, z2.h, z3.h
; CHECK-NEXT: ret
%x = call <vscale x 8 x i16> @llvm.fptosi.sat.nxv8f32.nxv8i16(<vscale x 8 x float> %f)
ret <vscale x 8 x i16> %x
@@ -203,14 +197,12 @@ define <vscale x 2 x i32> @test_signed_v2f64_v2i32(<vscale x 2 x double> %f) {
; CHECK-NEXT: mov x8, #281474972516352 // =0xffffffc00000
; CHECK-NEXT: movk x8, #16863, lsl #48
; CHECK-NEXT: mov z2.d, x8
-; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: mov z1.d, #0xffffffff80000000
-; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z2.d
-; CHECK-NEXT: mov z2.d, #0x7fffffff
+; CHECK-NEXT: fmaxnm z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: fminnm z1.d, p0/m, z1.d, z2.d
+; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d
-; CHECK-NEXT: fcvtzs z1.d, p1/m, z0.d
-; CHECK-NEXT: sel z0.d, p2, z2.d, z1.d
-; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0
+; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
%x = call <vscale x 2 x i32> @llvm.fptosi.sat.nxv2f64.nxv2i32(<vscale x 2 x double> %f)
ret <vscale x 2 x i32> %x
@@ -221,26 +213,22 @@ define <vscale x 4 x i32> @test_signed_v4f64_v4i32(<vscale x 4 x double> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #-4476578029606273024 // =0xc1e0000000000000
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z3.d, #0xffffffff80000000
; CHECK-NEXT: mov z2.d, x8
; CHECK-NEXT: mov x8, #281474972516352 // =0xffffffc00000
-; CHECK-NEXT: mov z4.d, #0xffffffff80000000
; CHECK-NEXT: movk x8, #16863, lsl #48
-; CHECK-NEXT: mov z5.d, x8
-; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, z2.d
-; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, z2.d
-; CHECK-NEXT: mov z2.d, #0x7fffffff
-; CHECK-NEXT: fcmgt p3.d, p0/z, z0.d, z5.d
-; CHECK-NEXT: fcvtzs z3.d, p1/m, z1.d
-; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z5.d
-; CHECK-NEXT: fcvtzs z4.d, p2/m, z0.d
-; CHECK-NEXT: fcmuo p2.d, p0/z, z1.d, z1.d
+; CHECK-NEXT: mov z4.d, x8
+; CHECK-NEXT: movprfx z3, z1
+; CHECK-NEXT: fmaxnm z3.d, p0/m, z3.d, z2.d
+; CHECK-NEXT: fmaxnm z2.d, p0/m, z2.d, z0.d
+; CHECK-NEXT: fcmuo p1.d, p0/z, z1.d, z1.d
+; CHECK-NEXT: fminnm z3.d, p0/m, z3.d, z4.d
+; CHECK-NEXT: fminnm z2.d, p0/m, z2.d, z4.d
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d
; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d
-; CHECK-NEXT: sel z0.d, p1, z2.d, z3.d
-; CHECK-NEXT: sel z1.d, p3, z2.d, z4.d
-; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0
-; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0
-; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s
+; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0
+; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0
+; CHECK-NEXT: uzp1 z0.s, z2.s, z3.s
; CHECK-NEXT: ret
%x = call <vscale x 4 x i32> @llvm.fptosi.sat.nxv4f64.nxv4i32(<vscale x 4 x double> %f)
ret <vscale x 4 x i32> %x
@@ -249,55 +237,37 @@ define <vscale x 4 x i32> @test_signed_v4f64_v4i32(<vscale x 4 x double> %f) {
define <vscale x 8 x i32> @test_signed_v8f64_v8i32(<vscale x 8 x double> %f) {
; CHECK-LABEL: test_signed_v8f64_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill
-; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill
-; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill
-; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG
-; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: mov x8, #-4476578029606273024 // =0xc1e0000000000000
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z5.d, #0xffffffff80000000
; CHECK-NEXT: mov z4.d, x8
; CHECK-NEXT: mov x8, #281474972516352 // =0xffffffc00000
-; CHECK-NEXT: mov z6.d, #0xffffffff80000000
; CHECK-NEXT: movk x8, #16863, lsl #48
-; CHECK-NEXT: mov z7.d, #0xffffffff80000000
-; CHECK-NEXT: mov z25.d, #0x7fffffff
-; CHECK-NEXT: mov z24.d, x8
-; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, z4.d
-; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, z4.d
-; CHECK-NEXT: fcmge p3.d, p0/z, z3.d, z4.d
-; CHECK-NEXT: fcmgt p4.d, p0/z, z0.d, z24.d
-; CHECK-NEXT: fcvtzs z5.d, p1/m, z1.d
-; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z24.d
-; CHECK-NEXT: fcvtzs z6.d, p2/m, z0.d
-; CHECK-NEXT: fcmge p2.d, p0/z, z2.d, z4.d
-; CHECK-NEXT: mov z4.d, #0xffffffff80000000
-; CHECK-NEXT: fcvtzs z7.d, p3/m, z3.d
-; CHECK-NEXT: fcmuo p3.d, p0/z, z1.d, z1.d
-; CHECK-NEXT: fcmgt p5.d, p0/z, z2.d, z24.d
-; CHECK-NEXT: sel z1.d, p1, z25.d, z5.d
-; CHECK-NEXT: fcmgt p1.d, p0/z, z3.d, z24.d
-; CHECK-NEXT: fcvtzs z4.d, p2/m, z2.d
+; CHECK-NEXT: mov z5.d, x8
+; CHECK-NEXT: movprfx z6, z1
+; CHECK-NEXT: fmaxnm z6.d, p0/m, z6.d, z4.d
+; CHECK-NEXT: movprfx z7, z0
+; CHECK-NEXT: fmaxnm z7.d, p0/m, z7.d, z4.d
+; CHECK-NEXT: movprfx z24, z3
+; CHECK-NEXT: fmaxnm z24.d, p0/m, z24.d, z4.d
+; CHECK-NEXT: fmaxnm z4.d, p0/m, z4.d, z2.d
+; CHECK-NEXT: fcmuo p1.d, p0/z, z1.d, z1.d
; CHECK-NEXT: fcmuo p2.d, p0/z, z0.d, z0.d
-; CHECK-NEXT: sel z0.d, p4, z25.d, z6.d
-; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload
-; CHECK-NEXT: mov z1.d, p3/m, #0 // =0x0
-; CHECK-NEXT: fcmuo p6.d, p0/z, z3.d, z3.d
+; CHECK-NEXT: fminnm z6.d, p0/m, z6.d, z5.d
+; CHECK-NEXT: fminnm z7.d, p0/m, z7.d, z5.d
+; CHECK-NEXT: fminnm z24.d, p0/m, z24.d, z5.d
+; CHECK-NEXT: fminnm z4.d, p0/m, z4.d, z5.d
+; CHECK-NEXT: fcmuo p3.d, p0/z, z3.d, z3.d
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.d
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.d
+; CHECK-NEXT: fcvtzs z24.d, p0/m, z24.d
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.d
; CHECK-NEXT: fcmuo p0.d, p0/z, z2.d, z2.d
-; CHECK-NEXT: sel z2.d, p1, z25.d, z7.d
-; CHECK-NEXT: sel z3.d, p5, z25.d, z4.d
-; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload
-; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0
-; CHECK-NEXT: mov z2.d, p6/m, #0 // =0x0
-; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload
-; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0
-; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
-; CHECK-NEXT: uzp1 z1.s, z3.s, z2.s
-; CHECK-NEXT: addvl sp, sp, #1
-; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: mov z6.d, p1/m, #0 // =0x0
+; CHECK-NEXT: mov z7.d, p2/m, #0 // =0x0
+; CHECK-NEXT: mov z24.d, p3/m, #0 // =0x0
+; CHECK-NEXT: mov z4.d, p0/m, #0 // =0x0
+; CHECK-NEXT: uzp1 z0.s, z7.s, z6.s
+; CHECK-NEXT: uzp1 z1.s, z4.s, z24.s
; CHECK-NEXT: ret
%x = call <vscale x 8 x i32> @llvm.fptosi.sat.nxv8f64.nxv8i32(<vscale x 8 x double> %f)
ret <vscale x 8 x i32> %x
@@ -308,26 +278,22 @@ define <vscale x 4 x i16> @test_signed_v4f64_v4i16(<vscale x 4 x double> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #-4548635623644200960 // =0xc0e0000000000000
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z3.d, #-32768 // =0xffffffffffff8000
; CHECK-NEXT: mov z2.d, x8
; CHECK-NEXT: mov x8, #281200098803712 // =0xffc000000000
-; CHECK-NEXT: mov z5.d, #32767 // =0x7fff
; CHECK-NEXT: movk x8, #16607, lsl #48
; CHECK-NEXT: mov z4.d, x8
-; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, z2.d
-; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, z2.d
-; CHECK-NEXT: mov z2.d, #-32768 // =0xffffffffffff8000
-; CHECK-NEXT: fcmgt p3.d, p0/z, z0.d, z4.d
-; CHECK-NEXT: fcvtzs z3.d, p1/m, z1.d
-; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z4.d
-; CHECK-NEXT: fcvtzs z2.d, p2/m, z0.d
-; CHECK-NEXT: fcmuo p2.d, p0/z, z1.d, z1.d
+; CHECK-NEXT: movprfx z3, z1
+; CHECK-NEXT: fmaxnm z3.d, p0/m, z3.d, z2.d
+; CHECK-NEXT: fmaxnm z2.d, p0/m, z2.d, z0.d
+; CHECK-NEXT: fcmuo p1.d, p0/z, z1.d, z1.d
+; CHECK-NEXT: fminnm z3.d, p0/m, z3.d, z4.d
+; CHECK-NEXT: fminnm z2.d, p0/m, z2.d, z4.d
+; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d
+; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d
; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d
-; CHECK-NEXT: sel z0.d, p1, z5.d, z3.d
-; CHECK-NEXT: sel z1.d, p3, z5.d, z2.d
-; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0
-; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0
-; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s
+; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0
+; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0
+; CHECK-NEXT: uzp1 z0.s, z2.s, z3.s
; CHECK-NEXT: ret
%x = call <vscale x 4 x i16> @llvm.fptosi.sat.nxv4f64.nxv4i16(<vscale x 4 x double> %f)
ret <vscale x 4 x i16> %x
@@ -336,56 +302,38 @@ define <vscale x 4 x i16> @test_signed_v4f64_v4i16(<vscale x 4 x double> %f) {
define <vscale x 8 x i16> @test_signed_v8f64_v8i16(<vscale x 8 x double> %f) {
; CHECK-LABEL: test_signed_v8f64_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Spill
-; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Spill
-; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill
-; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG
-; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: mov x8, #-4548635623644200960 // =0xc0e0000000000000
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z5.d, #-32768 // =0xffffffffffff8000
; CHECK-NEXT: mov z4.d, x8
; CHECK-NEXT: mov x8, #281200098803712 // =0xffc000000000
-; CHECK-NEXT: mov z7.d, #-32768 // =0xffffffffffff8000
; CHECK-NEXT: movk x8, #16607, lsl #48
-; CHECK-NEXT: mov z24.d, #-32768 // =0xffffffffffff8000
-; CHECK-NEXT: mov z25.d, #32767 // =0x7fff
-; CHECK-NEXT: mov z6.d, x8
-; CHECK-NEXT: fcmge p1.d, p0/z, z3.d, z4.d
-; CHECK-NEXT: fcmge p2.d, p0/z, z2.d, z4.d
-; CHECK-NEXT: fcmge p3.d, p0/z, z1.d, z4.d
-; CHECK-NEXT: fcmgt p4.d, p0/z, z2.d, z6.d
-; CHECK-NEXT: fcvtzs z5.d, p1/m, z3.d
-; CHECK-NEXT: fcmgt p1.d, p0/z, z3.d, z6.d
-; CHECK-NEXT: fcvtzs z7.d, p2/m, z2.d
-; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, z4.d
-; CHECK-NEXT: fcvtzs z24.d, p3/m, z1.d
-; CHECK-NEXT: fcmuo p3.d, p0/z, z3.d, z3.d
-; CHECK-NEXT: mov z3.d, #-32768 // =0xffffffffffff8000
-; CHECK-NEXT: fcmgt p5.d, p0/z, z0.d, z6.d
-; CHECK-NEXT: sel z4.d, p1, z25.d, z5.d
-; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z6.d
-; CHECK-NEXT: fcvtzs z3.d, p2/m, z0.d
+; CHECK-NEXT: mov z5.d, x8
+; CHECK-NEXT: movprfx z6, z3
+; CHECK-NEXT: fmaxnm z6.d, p0/m, z6.d, z4.d
+; CHECK-NEXT: movprfx z7, z2
+; CHECK-NEXT: fmaxnm z7.d, p0/m, z7.d, z4.d
+; CHECK-NEXT: movprfx z24, z1
+; CHECK-NEXT: fmaxnm z24.d, p0/m, z24.d, z4.d
+; CHECK-NEXT: fmaxnm z4.d, p0/m, z4.d, z0.d
+; CHECK-NEXT: fcmuo p1.d, p0/z, z3.d, z3.d
; CHECK-NEXT: fcmuo p2.d, p0/z, z2.d, z2.d
-; CHECK-NEXT: fcmuo p6.d, p0/z, z1.d, z1.d
-; CHECK-NEXT: mov z4.d, p3/m, #0 // =0x0
+; CHECK-NEXT: fminnm z6.d, p0/m, z6.d, z5.d
+; CHECK-NEXT: fminnm z7.d, p0/m, z7.d, z5.d
+; CHECK-NEXT: fminnm z24.d, p0/m, z24.d, z5.d
+; CHECK-NEXT: fminnm z4.d, p0/m, z4.d, z5.d
+; CHECK-NEXT: fcmuo p3.d, p0/z, z1.d, z1.d
+; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.d
+; CHECK-NEXT: fcvtzs z7.d, p0/m, z7.d
+; CHECK-NEXT: fcvtzs z24.d, p0/m, z24.d
+; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.d
; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d
-; CHECK-NEXT: sel z0.d, p4, z25.d, z7.d
-; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload
-; CHECK-NEXT: sel z1.d, p1, z25.d, z24.d
-; CHECK-NEXT: sel z2.d, p5, z25.d, z3.d
-; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Reload
-; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0
-; CHECK-NEXT: mov z1.d, p6/m, #0 // =0x0
-; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0
-; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Reload
-; CHECK-NEXT: uzp1 z0.s, z0.s, z4.s
-; CHECK-NEXT: uzp1 z1.s, z2.s, z1.s
+; CHECK-NEXT: mov z6.d, p1/m, #0 // =0x0
+; CHECK-NEXT: mov z7.d, p2/m, #0 // =0x0
+; CHECK-NEXT: mov z24.d, p3/m, #0 // =0x0
+; CHECK-NEXT: mov z4.d, p0/m, #0 // =0x0
+; CHECK-NEXT: uzp1 z0.s, z7.s, z6.s
+; CHECK-NEXT: uzp1 z1.s, z4.s, z24.s
; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
-; CHECK-NEXT: addvl sp, sp, #1
-; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%x = call <vscale x 8 x i16> @llvm.fptosi.sat.nxv8f64.nxv8i16(<vscale x 8 x double> %f)
ret <vscale x 8 x i16> %x
diff --git a/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll
index fe398921cf1b5..61620fbdf0826 100644
--- a/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll
@@ -73,14 +73,15 @@ define <vscale x 4 x i16> @test_signed_v4f32_v4i16(<vscale x 4 x float> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov w8, #65280 // =0xff00
-; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: movk w8, #18303, lsl #16
-; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0
+; CHECK-NEXT: movprfx z1, z0
+; CHECK-NEXT: fmaxnm z1.s, p0/m, z1.s, #0.0
; CHECK-NEXT: mov z2.s, w8
-; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z2.s
-; CHECK-NEXT: fcvtzu z1.s, p1/m, z0.s
-; CHECK-NEXT: mov z0.s, #65535 // =0xffff
-; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: fminnm z1.s, p0/m, z1.s, z2.s
+; CHECK-NEXT: fcvtzu z1.s, p0/m, z1.s
+; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s
+; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
%x = call <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f32.nxv4i16(<vscale x 4 x float> %f)
ret <vscale x 4 x i16> %x
@@ -91,20 +92,21 @@ define <vscale x 8 x i16> @test_signed_v8f32_v8i16(<vscale x 8 x float> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov w8, #65280 // =0xff00
-; CHECK-NEXT: movi v2.2d, #0000000000000000
; CHECK-NEXT: movk w8, #18303, lsl #16
-; CHECK-NEXT: movi v3.2d, #0000000000000000
-; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, #0.0
+; CHECK-NEXT: movprfx z2, z1
+; CHECK-NEXT: fmaxnm z2.s, p0/m, z2.s, #0.0
+; CHECK-NEXT: movprfx z3, z0
+; CHECK-NEXT: fmaxnm z3.s, p0/m, z3.s, #0.0
; CHECK-NEXT: mov z4.s, w8
-; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, #0.0
-; CHECK-NEXT: fcvtzu z2.s, p1/m, z1.s
-; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z4.s
-; CHECK-NEXT: mov z1.s, #65535 // =0xffff
-; CHECK-NEXT: fcvtzu z3.s, p2/m, z0.s
-; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z4.s
-; CHECK-NEXT: sel z0.s, p1, z1.s, z2.s
-; CHECK-NEXT: sel z1.s, p0, z1.s, z3.s
-; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
+; CHECK-NEXT: fcmuo p1.s, p0/z, z1.s, z1.s
+; CHECK-NEXT: fminnm z2.s, p0/m, z2.s, z4.s
+; CHECK-NEXT: fminnm z3.s, p0/m, z3.s, z4.s
+; CHECK-NEXT: fcvtzu z2.s, p0/m, z2.s
+; CHECK-NEXT: fcvtzu z3.s, p0/m, z3.s
+; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s
+; CHECK-NEXT: mov z2.s, p1/m, #0 // =0x0
+; CHECK-NEXT: mov z3.s, p0/m, #0 // =0x0
+; CHECK-NEXT: uzp1 z0.h, z3.h, z2.h
; CHECK-NEXT: ret
%x = call <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f32.nxv8i16(<vscale x 8 x float> %f)
ret <vscale x 8 x i16> %x
@@ -165,14 +167,15 @@ define <vscale x 2 x i32> @test_signed_v2f64_v2i32(<vscale x 2 x double> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, #281474974613504 // =0xffffffe00000
-; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: movk x8, #16879, lsl #48
-; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0
+; CHECK-NEXT: movprfx z1, z0
+; CHECK-NEXT: fmaxnm z1.d, p0/m, z1.d, #0.0
; CHECK-NEXT: mov z2.d, x8
-; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z2.d
-; CHECK-NEXT: fcvtzu z1.d, p1/m, z0.d
-; CHECK-NEXT: mov z0.d, #0xffffffff
-; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: fminnm z1.d, p0/m, z1.d, z2.d
+; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.d
+; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d
+; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0
+; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
%x = call <vscale x 2 x i32> @llvm.fptoui.sat.nxv2f64.nxv2i32(<vscale x 2 x double> %f)
ret <vscale x 2 x i32> %x
@@ -183,20 +186,21 @@ define <vscale x 4 x i32> @test_signed_v4f64_v4i32(<vscale x 4 x double> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, #281474974613504 // =0xffffffe00000
-; CHECK-NEXT: movi v2.2d, #0000000000000000
; CHECK-NEXT: movk x8, #16879, lsl #48
-; CHECK-NEXT: movi v3.2d, #0000000000000000
-; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0
+; CHECK-NEXT: movprfx z2, z1
+; CHECK-NEXT: fmaxnm z2.d, p0/m, z2.d, #0.0
+; CHECK-NEXT: movprfx z3, z0
+; CHECK-NEXT: fmaxnm z3.d, p0/m, z3.d, #0.0
; CHECK-NEXT: mov z4.d, x8
-; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0
-; CHECK-NEXT: fcvtzu z2.d, p1/m, z1.d
-; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z4.d
-; CHECK-NEXT: mov z1.d, #0xffffffff
-; CHECK-NEXT: fcvtzu z3.d, p2/m, z0.d
-; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z4.d
-; CHECK-NEXT: sel z0.d, p1, z1.d, z2.d
-; CHECK-NEXT: sel z1.d, p0, z1.d, z3.d
-; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s
+; CHECK-NEXT: fcmuo p1.d, p0/z, z1.d, z1.d
+; CHECK-NEXT: fminnm z2.d, p0/m, z2.d, z4.d
+; CHECK-NEXT: fminnm z3.d, p0/m, z3.d, z4.d
+; CHECK-NEXT: fcvtzu z2.d, p0/m, z2.d
+; CHECK-NEXT: fcvtzu z3.d, p0/m, z3.d
+; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d
+; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0
+; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0
+; CHECK-NEXT: uzp1 z0.s, z3.s, z2.s
; CHECK-NEXT: ret
%x = call <vscale x 4 x i32> @llvm.fptoui.sat.nxv4f64.nxv4i32(<vscale x 4 x double> %f)
ret <vscale x 4 x i32> %x
@@ -205,41 +209,36 @@ define <vscale x 4 x i32> @test_signed_v4f64_v4i32(<vscale x 4 x double> %f) {
define <vscale x 8 x i32> @test_signed_v8f64_v8i32(<vscale x 8 x double> %f) {
; CHECK-LABEL: test_signed_v8f64_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill
-; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG
-; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, #281474974613504 // =0xffffffe00000
-; CHECK-NEXT: movi v4.2d, #0000000000000000
; CHECK-NEXT: movk x8, #16879, lsl #48
-; CHECK-NEXT: movi v5.2d, #0000000000000000
-; CHECK-NEXT: movi v6.2d, #0000000000000000
-; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0
-; CHECK-NEXT: movi v24.2d, #0000000000000000
-; CHECK-NEXT: mov z7.d, x8
-; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0
-; CHECK-NEXT: fcmge p3.d, p0/z, z3.d, #0.0
-; CHECK-NEXT: fcmge p4.d, p0/z, z2.d, #0.0
-; CHECK-NEXT: fcvtzu z4.d, p1/m, z1.d
-; CHECK-NEXT: fcvtzu z5.d, p2/m, z0.d
-; CHECK-NEXT: fcvtzu z6.d, p3/m, z3.d
-; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z7.d
-; CHECK-NEXT: fcvtzu z24.d, p4/m, z2.d
-; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload
-; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z7.d
-; CHECK-NEXT: mov z0.d, #0xffffffff
-; CHECK-NEXT: fcmgt p3.d, p0/z, z3.d, z7.d
-; CHECK-NEXT: fcmgt p0.d, p0/z, z2.d, z7.d
-; CHECK-NEXT: sel z1.d, p1, z0.d, z4.d
-; CHECK-NEXT: sel z2.d, p2, z0.d, z5.d
-; CHECK-NEXT: sel z3.d, p3, z0.d, z6.d
-; CHECK-NEXT: sel z4.d, p0, z0.d, z24.d
-; CHECK-NEXT: uzp1 z0.s, z2.s, z1.s
-; CHECK-NEXT: uzp1 z1.s, z4.s, z3.s
-; CHECK-NEXT: addvl sp, sp, #1
-; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: movprfx z5, z1
+; CHECK-NEXT: fmaxnm z5.d, p0/m, z5.d, #0.0
+; CHECK-NEXT: movprfx z6, z0
+; CHECK-NEXT: fmaxnm z6.d, p0/m, z6.d, #0.0
+; CHECK-NEXT: movprfx z7, z3
+; CHECK-NEXT: fmaxnm z7.d, p0/m, z7.d, #0.0
+; CHECK-NEXT: movprfx z24, z2
+; CHECK-NEXT: fmaxnm z24.d, p0/m, z24.d, #0.0
+; CHECK-NEXT: mov z4.d, x8
+; CHECK-NEXT: fcmuo p1.d, p0/z, z1.d, z1.d
+; CHECK-NEXT: fcmuo p2.d, p0/z, z0.d, z0.d
+; CHECK-NEXT: fminnm z5.d, p0/m, z5.d, z4.d
+; CHECK-NEXT: fminnm z6.d, p0/m, z6.d, z4.d
+; CHECK-NEXT: fminnm z7.d, p0/m, z7.d, z4.d
+; CHECK-NEXT: fminnm z4.d, p0/m, z4.d, z24.d
+; CHECK-NEXT: fcmuo p3.d, p0/z, z3.d, z3.d
+; CHECK-NEXT: fcvtzu z5.d, p0/m, z5.d
+; CHECK-NEXT: fcvtzu z6.d, p0/m, z6.d
+; CHECK-NEXT: fcvtzu z7.d, p0/m, z7.d
+; CHECK-NEXT: fcvtzu z4.d, p0/m, z4.d
+; CHECK-NEXT: fcmuo p0.d, p0/z, z2.d, z2.d
+; CHECK-NEXT: mov z5.d, p1/m, #0 // =0x0
+; CHECK-NEXT: mov z6.d, p2/m, #0 // =0x0
+; CHECK-NEXT: mov z7.d, p3/m, #0 // =0x0
+; CHECK-NEXT: mov z4.d, p0/m, #0 // =0x0
+; CHECK-NEXT: uzp1 z0.s, z6.s, z5.s
+; CHECK-NEXT: uzp1 z1.s, z4.s, z7.s
; CHECK-NEXT: ret
%x = call <vscale x 8 x i32> @llvm.fptoui.sat.nxv8f64.nxv8i32(<vscale x 8 x double> %f)
ret <vscale x 8 x i32> %x
@@ -250,20 +249,21 @@ define <vscale x 4 x i16> @test_signed_v4f64_v4i16(<vscale x 4 x double> %f) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, #281337537757184 // =0xffe000000000
-; CHECK-NEXT: movi v2.2d, #0000000000000000
; CHECK-NEXT: movk x8, #16623, lsl #48
-; CHECK-NEXT: movi v3.2d, #0000000000000000
-; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0
+; CHECK-NEXT: movprfx z2, z1
+; CHECK-NEXT: fmaxnm z2.d, p0/m, z2.d, #0.0
+; CHECK-NEXT: movprfx z3, z0
+; CHECK-NEXT: fmaxnm z3.d, p0/m, z3.d, #0.0
; CHECK-NEXT: mov z4.d, x8
-; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0
-; CHECK-NEXT: fcvtzu z2.d, p1/m, z1.d
-; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z4.d
-; CHECK-NEXT: mov z1.d, #65535 // =0xffff
-; CHECK-NEXT: fcvtzu z3.d, p2/m, z0.d
-; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z4.d
-; CHECK-NEXT: sel z0.d, p1, z1.d, z2.d
-; CHECK-NEXT: sel z1.d, p0, z1.d, z3.d
-; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s
+; CHECK-NEXT: fcmuo p1.d, p0/z, z1.d, z1.d
+; CHECK-NEXT: fminnm z2.d, p0/m, z2.d, z4.d
+; CHECK-NEXT: fminnm z3.d, p0/m, z3.d, z4.d
+; CHECK-NEXT: fcvtzu z2.d, p0/m, z2.d
+; CHECK-NEXT: fcvtzu z3.d, p0/m, z3.d
+; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d
+; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0
+; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0
+; CHECK-NEXT: uzp1 z0.s, z3.s, z2.s
; CHECK-NEXT: ret
%x = call <vscale x 4 x i16> @llvm.fptoui.sat.nxv4f64.nxv4i16(<vscale x 4 x double> %f)
ret <vscale x 4 x i16> %x
@@ -272,42 +272,37 @@ define <vscale x 4 x i16> @test_signed_v4f64_v4i16(<vscale x 4 x double> %f) {
define <vscale x 8 x i16> @test_signed_v8f64_v8i16(<vscale x 8 x double> %f) {
; CHECK-LABEL: test_signed_v8f64_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Spill
-; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG
-; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov x8, #281337537757184 // =0xffe000000000
-; CHECK-NEXT: movi v4.2d, #0000000000000000
; CHECK-NEXT: movk x8, #16623, lsl #48
-; CHECK-NEXT: movi v5.2d, #0000000000000000
-; CHECK-NEXT: movi v6.2d, #0000000000000000
-; CHECK-NEXT: fcmge p1.d, p0/z, z3.d, #0.0
-; CHECK-NEXT: movi v24.2d, #0000000000000000
-; CHECK-NEXT: mov z7.d, x8
-; CHECK-NEXT: fcmge p2.d, p0/z, z2.d, #0.0
-; CHECK-NEXT: fcmge p3.d, p0/z, z1.d, #0.0
-; CHECK-NEXT: fcmge p4.d, p0/z, z0.d, #0.0
-; CHECK-NEXT: fcvtzu z4.d, p1/m, z3.d
-; CHECK-NEXT: fcvtzu z5.d, p2/m, z2.d
-; CHECK-NEXT: fcvtzu z6.d, p3/m, z1.d
-; CHECK-NEXT: fcmgt p1.d, p0/z, z3.d, z7.d
-; CHECK-NEXT: fcvtzu z24.d, p4/m, z0.d
-; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Reload
-; CHECK-NEXT: fcmgt p2.d, p0/z, z2.d, z7.d
-; CHECK-NEXT: mov z2.d, #65535 // =0xffff
-; CHECK-NEXT: fcmgt p3.d, p0/z, z1.d, z7.d
-; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z7.d
-; CHECK-NEXT: sel z0.d, p1, z2.d, z4.d
-; CHECK-NEXT: sel z1.d, p2, z2.d, z5.d
-; CHECK-NEXT: sel z3.d, p3, z2.d, z6.d
-; CHECK-NEXT: sel z2.d, p0, z2.d, z24.d
-; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s
-; CHECK-NEXT: uzp1 z1.s, z2.s, z3.s
+; CHECK-NEXT: movprfx z5, z3
+; CHECK-NEXT: fmaxnm z5.d, p0/m, z5.d, #0.0
+; CHECK-NEXT: movprfx z6, z2
+; CHECK-NEXT: fmaxnm z6.d, p0/m, z6.d, #0.0
+; CHECK-NEXT: movprfx z7, z1
+; CHECK-NEXT: fmaxnm z7.d, p0/m, z7.d, #0.0
+; CHECK-NEXT: movprfx z24, z0
+; CHECK-NEXT: fmaxnm z24.d, p0/m, z24.d, #0.0
+; CHECK-NEXT: mov z4.d, x8
+; CHECK-NEXT: fcmuo p1.d, p0/z, z3.d, z3.d
+; CHECK-NEXT: fcmuo p2.d, p0/z, z2.d, z2.d
+; CHECK-NEXT: fminnm z5.d, p0/m, z5.d, z4.d
+; CHECK-NEXT: fminnm z6.d, p0/m, z6.d, z4.d
+; CHECK-NEXT: fminnm z7.d, p0/m, z7.d, z4.d
+; CHECK-NEXT: fminnm z4.d, p0/m, z4.d, z24.d
+; CHECK-NEXT: fcmuo p3.d, p0/z, z1.d, z1.d
+; CHECK-NEXT: fcvtzu z5.d, p0/m, z5.d
+; CHECK-NEXT: fcvtzu z6.d, p0/m, z6.d
+; CHECK-NEXT: fcvtzu z7.d, p0/m, z7.d
+; CHECK-NEXT: fcvtzu z4.d, p0/m, z4.d
+; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d
+; CHECK-NEXT: mov z5.d, p1/m, #0 // =0x0
+; CHECK-NEXT: mov z6.d, p2/m, #0 // =0x0
+; CHECK-NEXT: mov z7.d, p3/m, #0 // =0x0
+; CHECK-NEXT: mov z4.d, p0/m, #0 // =0x0
+; CHECK-NEXT: uzp1 z0.s, z6.s, z5.s
+; CHECK-NEXT: uzp1 z1.s, z4.s, z7.s
; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h
-; CHECK-NEXT: addvl sp, sp, #1
-; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%x = call <vscale x 8 x i16> @llvm.fptoui.sat.nxv8f64.nxv8i16(<vscale x 8 x double> %f)
ret <vscale x 8 x i16> %x
diff --git a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
index 5179f97624489..bccb0ad150509 100644
--- a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
@@ -67,23 +67,22 @@ define i1 @test_signed_i1_f32(float %f) nounwind {
;
; VFP2-LABEL: test_signed_i1_f32:
; VFP2: @ %bb.0:
-; VFP2-NEXT: vmov s2, r0
; VFP2-NEXT: vmov.f32 s0, #-1.000000e+00
-; VFP2-NEXT: vcvt.s32.f32 s4, s2
-; VFP2-NEXT: vcmp.f32 s2, s0
-; VFP2-NEXT: vmov r0, s4
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt.w r0, #-1
-; VFP2-NEXT: vcmp.f32 s2, #0
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movgt r0, #0
+; VFP2-NEXT: vldr s4, .LCPI0_0
+; VFP2-NEXT: vmov s2, r0
; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.s32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: it vs
; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: bx lr
+; VFP2-NEXT: .p2align 2
+; VFP2-NEXT: @ %bb.1:
+; VFP2-NEXT: .LCPI0_0:
+; VFP2-NEXT: .long 0x00000000 @ float 0
;
; FP16-LABEL: test_signed_i1_f32:
; FP16: @ %bb.0:
@@ -157,21 +156,15 @@ define i8 @test_signed_i8_f32(float %f) nounwind {
;
; VFP2-LABEL: test_signed_i8_f32:
; VFP2: @ %bb.0:
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s2, .LCPI1_0
-; VFP2-NEXT: vldr s6, .LCPI1_1
-; VFP2-NEXT: vcvt.s32.f32 s4, s0
-; VFP2-NEXT: vcmp.f32 s0, s2
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s6
-; VFP2-NEXT: vmov r0, s4
-; VFP2-NEXT: it lt
-; VFP2-NEXT: mvnlt r0, #127
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movgt r0, #127
-; VFP2-NEXT: vcmp.f32 s0, s0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI1_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI1_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.s32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: it vs
; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: bx lr
@@ -256,22 +249,15 @@ define i13 @test_signed_i13_f32(float %f) nounwind {
;
; VFP2-LABEL: test_signed_i13_f32:
; VFP2: @ %bb.0:
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s2, .LCPI2_0
-; VFP2-NEXT: vldr s6, .LCPI2_1
-; VFP2-NEXT: vcvt.s32.f32 s4, s0
-; VFP2-NEXT: vcmp.f32 s0, s2
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s6
-; VFP2-NEXT: vmov r0, s4
-; VFP2-NEXT: itt lt
-; VFP2-NEXT: movwlt r0, #61440
-; VFP2-NEXT: movtlt r0, #65535
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movwgt r0, #4095
-; VFP2-NEXT: vcmp.f32 s0, s0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI2_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI2_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.s32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: it vs
; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: bx lr
@@ -356,22 +342,15 @@ define i16 @test_signed_i16_f32(float %f) nounwind {
;
; VFP2-LABEL: test_signed_i16_f32:
; VFP2: @ %bb.0:
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s2, .LCPI3_0
-; VFP2-NEXT: vldr s6, .LCPI3_1
-; VFP2-NEXT: vcvt.s32.f32 s4, s0
-; VFP2-NEXT: vcmp.f32 s0, s2
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s6
-; VFP2-NEXT: vmov r0, s4
-; VFP2-NEXT: itt lt
-; VFP2-NEXT: movwlt r0, #32768
-; VFP2-NEXT: movtlt r0, #65535
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movwgt r0, #32767
-; VFP2-NEXT: vcmp.f32 s0, s0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI3_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI3_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.s32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: it vs
; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: bx lr
@@ -456,23 +435,15 @@ define i19 @test_signed_i19_f32(float %f) nounwind {
;
; VFP2-LABEL: test_signed_i19_f32:
; VFP2: @ %bb.0:
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s2, .LCPI4_0
-; VFP2-NEXT: vldr s6, .LCPI4_1
-; VFP2-NEXT: vcvt.s32.f32 s4, s0
-; VFP2-NEXT: vcmp.f32 s0, s2
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s6
-; VFP2-NEXT: vmov r0, s4
-; VFP2-NEXT: itt lt
-; VFP2-NEXT: movlt r0, #0
-; VFP2-NEXT: movtlt r0, #65532
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s0
-; VFP2-NEXT: itt gt
-; VFP2-NEXT: movwgt r0, #65535
-; VFP2-NEXT: movtgt r0, #3
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI4_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI4_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.s32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: it vs
; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: bx lr
@@ -2492,23 +2463,22 @@ define i1 @test_signed_i1_f16(half %f) nounwind {
; VFP2-NEXT: .save {r7, lr}
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: bl __aeabi_h2f
-; VFP2-NEXT: vmov s2, r0
; VFP2-NEXT: vmov.f32 s0, #-1.000000e+00
-; VFP2-NEXT: vcvt.s32.f32 s4, s2
-; VFP2-NEXT: vcmp.f32 s2, s0
-; VFP2-NEXT: vmov r0, s4
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt.w r0, #-1
-; VFP2-NEXT: vcmp.f32 s2, #0
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movgt r0, #0
+; VFP2-NEXT: vldr s4, .LCPI20_0
+; VFP2-NEXT: vmov s2, r0
; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.s32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: it vs
; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: pop {r7, pc}
+; VFP2-NEXT: .p2align 2
+; VFP2-NEXT: @ %bb.1:
+; VFP2-NEXT: .LCPI20_0:
+; VFP2-NEXT: .long 0x00000000 @ float 0
;
; FP16-LABEL: test_signed_i1_f16:
; FP16: @ %bb.0:
@@ -2588,21 +2558,15 @@ define i8 @test_signed_i8_f16(half %f) nounwind {
; VFP2-NEXT: .save {r7, lr}
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: bl __aeabi_h2f
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s2, .LCPI21_0
-; VFP2-NEXT: vldr s6, .LCPI21_1
-; VFP2-NEXT: vcvt.s32.f32 s4, s0
-; VFP2-NEXT: vcmp.f32 s0, s2
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s6
-; VFP2-NEXT: vmov r0, s4
-; VFP2-NEXT: it lt
-; VFP2-NEXT: mvnlt r0, #127
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movgt r0, #127
-; VFP2-NEXT: vcmp.f32 s0, s0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI21_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI21_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.s32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: it vs
; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: pop {r7, pc}
@@ -2693,22 +2657,15 @@ define i13 @test_signed_i13_f16(half %f) nounwind {
; VFP2-NEXT: .save {r7, lr}
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: bl __aeabi_h2f
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s2, .LCPI22_0
-; VFP2-NEXT: vldr s6, .LCPI22_1
-; VFP2-NEXT: vcvt.s32.f32 s4, s0
-; VFP2-NEXT: vcmp.f32 s0, s2
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s6
-; VFP2-NEXT: vmov r0, s4
-; VFP2-NEXT: itt lt
-; VFP2-NEXT: movwlt r0, #61440
-; VFP2-NEXT: movtlt r0, #65535
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movwgt r0, #4095
-; VFP2-NEXT: vcmp.f32 s0, s0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI22_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI22_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.s32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: it vs
; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: pop {r7, pc}
@@ -2799,22 +2756,15 @@ define i16 @test_signed_i16_f16(half %f) nounwind {
; VFP2-NEXT: .save {r7, lr}
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: bl __aeabi_h2f
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s2, .LCPI23_0
-; VFP2-NEXT: vldr s6, .LCPI23_1
-; VFP2-NEXT: vcvt.s32.f32 s4, s0
-; VFP2-NEXT: vcmp.f32 s0, s2
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s6
-; VFP2-NEXT: vmov r0, s4
-; VFP2-NEXT: itt lt
-; VFP2-NEXT: movwlt r0, #32768
-; VFP2-NEXT: movtlt r0, #65535
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movwgt r0, #32767
-; VFP2-NEXT: vcmp.f32 s0, s0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI23_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI23_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.s32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: it vs
; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: pop {r7, pc}
@@ -2905,23 +2855,15 @@ define i19 @test_signed_i19_f16(half %f) nounwind {
; VFP2-NEXT: .save {r7, lr}
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: bl __aeabi_h2f
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s2, .LCPI24_0
-; VFP2-NEXT: vldr s6, .LCPI24_1
-; VFP2-NEXT: vcvt.s32.f32 s4, s0
-; VFP2-NEXT: vcmp.f32 s0, s2
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s6
-; VFP2-NEXT: vmov r0, s4
-; VFP2-NEXT: itt lt
-; VFP2-NEXT: movlt r0, #0
-; VFP2-NEXT: movtlt r0, #65532
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s0
-; VFP2-NEXT: itt gt
-; VFP2-NEXT: movwgt r0, #65535
-; VFP2-NEXT: movtgt r0, #3
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI24_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI24_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.s32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: it vs
; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: pop {r7, pc}
diff --git a/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll b/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll
index 4cc5f943dadff..5ea9a79483873 100644
--- a/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll
@@ -49,19 +49,22 @@ define i1 @test_signed_i1_f32(float %f) nounwind {
;
; VFP2-LABEL: test_signed_i1_f32:
; VFP2: @ %bb.0:
-; VFP2-NEXT: vmov s0, r0
; VFP2-NEXT: vmov.f32 s4, #1.000000e+00
-; VFP2-NEXT: vcvt.u32.f32 s2, s0
-; VFP2-NEXT: vcmp.f32 s0, #0
+; VFP2-NEXT: vldr s2, .LCPI0_0
+; VFP2-NEXT: vmov s0, r0
+; VFP2-NEXT: vmax.f32 d16, d0, d1
+; VFP2-NEXT: vcmp.f32 s0, s0
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s4
+; VFP2-NEXT: vmin.f32 d1, d16, d2
+; VFP2-NEXT: vcvt.u32.f32 s2, s2
; VFP2-NEXT: vmov r0, s2
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r0, #0
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movgt r0, #1
+; VFP2-NEXT: it vs
+; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: bx lr
+; VFP2-NEXT: .p2align 2
+; VFP2-NEXT: @ %bb.1:
+; VFP2-NEXT: .LCPI0_0:
+; VFP2-NEXT: .long 0x00000000 @ float 0
;
; FP16-LABEL: test_signed_i1_f32:
; FP16: @ %bb.0:
@@ -69,9 +72,13 @@ define i1 @test_signed_i1_f32(float %f) nounwind {
; FP16-NEXT: vmov s2, r0
; FP16-NEXT: vmov.f32 s4, #1.000000e+00
; FP16-NEXT: vmaxnm.f32 s0, s2, s0
+; FP16-NEXT: vcmp.f32 s2, s2
; FP16-NEXT: vminnm.f32 s0, s0, s4
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
; FP16-NEXT: vcvt.u32.f32 s0, s0
; FP16-NEXT: vmov r0, s0
+; FP16-NEXT: it vs
+; FP16-NEXT: movvs r0, #0
; FP16-NEXT: bx lr
; FP16-NEXT: .p2align 2
; FP16-NEXT: @ %bb.1:
@@ -115,22 +122,23 @@ define i8 @test_signed_i8_f32(float %f) nounwind {
;
; VFP2-LABEL: test_signed_i8_f32:
; VFP2: @ %bb.0:
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s4, .LCPI1_0
-; VFP2-NEXT: vcvt.u32.f32 s2, s0
-; VFP2-NEXT: vcmp.f32 s0, #0
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s4
-; VFP2-NEXT: vmov r0, s2
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r0, #0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI1_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI1_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movgt r0, #255
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.u32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
+; VFP2-NEXT: it vs
+; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: bx lr
; VFP2-NEXT: .p2align 2
; VFP2-NEXT: @ %bb.1:
; VFP2-NEXT: .LCPI1_0:
+; VFP2-NEXT: .long 0x00000000 @ float 0
+; VFP2-NEXT: .LCPI1_1:
; VFP2-NEXT: .long 0x437f0000 @ float 255
;
; FP16-LABEL: test_signed_i8_f32:
@@ -139,9 +147,13 @@ define i8 @test_signed_i8_f32(float %f) nounwind {
; FP16-NEXT: vmov s2, r0
; FP16-NEXT: vldr s4, .LCPI1_1
; FP16-NEXT: vmaxnm.f32 s0, s2, s0
+; FP16-NEXT: vcmp.f32 s2, s2
; FP16-NEXT: vminnm.f32 s0, s0, s4
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
; FP16-NEXT: vcvt.u32.f32 s0, s0
; FP16-NEXT: vmov r0, s0
+; FP16-NEXT: it vs
+; FP16-NEXT: movvs r0, #0
; FP16-NEXT: bx lr
; FP16-NEXT: .p2align 2
; FP16-NEXT: @ %bb.1:
@@ -189,22 +201,23 @@ define i13 @test_signed_i13_f32(float %f) nounwind {
;
; VFP2-LABEL: test_signed_i13_f32:
; VFP2: @ %bb.0:
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s4, .LCPI2_0
-; VFP2-NEXT: vcvt.u32.f32 s2, s0
-; VFP2-NEXT: vcmp.f32 s0, #0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI2_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI2_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s4
-; VFP2-NEXT: vmov r0, s2
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r0, #0
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movwgt r0, #8191
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.u32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
+; VFP2-NEXT: it vs
+; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: bx lr
; VFP2-NEXT: .p2align 2
; VFP2-NEXT: @ %bb.1:
; VFP2-NEXT: .LCPI2_0:
+; VFP2-NEXT: .long 0x00000000 @ float 0
+; VFP2-NEXT: .LCPI2_1:
; VFP2-NEXT: .long 0x45fff800 @ float 8191
;
; FP16-LABEL: test_signed_i13_f32:
@@ -213,9 +226,13 @@ define i13 @test_signed_i13_f32(float %f) nounwind {
; FP16-NEXT: vmov s2, r0
; FP16-NEXT: vldr s4, .LCPI2_1
; FP16-NEXT: vmaxnm.f32 s0, s2, s0
+; FP16-NEXT: vcmp.f32 s2, s2
; FP16-NEXT: vminnm.f32 s0, s0, s4
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
; FP16-NEXT: vcvt.u32.f32 s0, s0
; FP16-NEXT: vmov r0, s0
+; FP16-NEXT: it vs
+; FP16-NEXT: movvs r0, #0
; FP16-NEXT: bx lr
; FP16-NEXT: .p2align 2
; FP16-NEXT: @ %bb.1:
@@ -263,22 +280,23 @@ define i16 @test_signed_i16_f32(float %f) nounwind {
;
; VFP2-LABEL: test_signed_i16_f32:
; VFP2: @ %bb.0:
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s4, .LCPI3_0
-; VFP2-NEXT: vcvt.u32.f32 s2, s0
-; VFP2-NEXT: vcmp.f32 s0, #0
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s4
-; VFP2-NEXT: vmov r0, s2
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r0, #0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI3_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI3_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movwgt r0, #65535
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.u32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
+; VFP2-NEXT: it vs
+; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: bx lr
; VFP2-NEXT: .p2align 2
; VFP2-NEXT: @ %bb.1:
; VFP2-NEXT: .LCPI3_0:
+; VFP2-NEXT: .long 0x00000000 @ float 0
+; VFP2-NEXT: .LCPI3_1:
; VFP2-NEXT: .long 0x477fff00 @ float 65535
;
; FP16-LABEL: test_signed_i16_f32:
@@ -287,9 +305,13 @@ define i16 @test_signed_i16_f32(float %f) nounwind {
; FP16-NEXT: vmov s2, r0
; FP16-NEXT: vldr s4, .LCPI3_1
; FP16-NEXT: vmaxnm.f32 s0, s2, s0
+; FP16-NEXT: vcmp.f32 s2, s2
; FP16-NEXT: vminnm.f32 s0, s0, s4
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
; FP16-NEXT: vcvt.u32.f32 s0, s0
; FP16-NEXT: vmov r0, s0
+; FP16-NEXT: it vs
+; FP16-NEXT: movvs r0, #0
; FP16-NEXT: bx lr
; FP16-NEXT: .p2align 2
; FP16-NEXT: @ %bb.1:
@@ -337,23 +359,23 @@ define i19 @test_signed_i19_f32(float %f) nounwind {
;
; VFP2-LABEL: test_signed_i19_f32:
; VFP2: @ %bb.0:
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s4, .LCPI4_0
-; VFP2-NEXT: vcvt.u32.f32 s2, s0
-; VFP2-NEXT: vcmp.f32 s0, #0
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s4
-; VFP2-NEXT: vmov r0, s2
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r0, #0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI4_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI4_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: itt gt
-; VFP2-NEXT: movwgt r0, #65535
-; VFP2-NEXT: movtgt r0, #7
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.u32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
+; VFP2-NEXT: it vs
+; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: bx lr
; VFP2-NEXT: .p2align 2
; VFP2-NEXT: @ %bb.1:
; VFP2-NEXT: .LCPI4_0:
+; VFP2-NEXT: .long 0x00000000 @ float 0
+; VFP2-NEXT: .LCPI4_1:
; VFP2-NEXT: .long 0x48ffffe0 @ float 524287
;
; FP16-LABEL: test_signed_i19_f32:
@@ -362,9 +384,13 @@ define i19 @test_signed_i19_f32(float %f) nounwind {
; FP16-NEXT: vmov s2, r0
; FP16-NEXT: vldr s4, .LCPI4_1
; FP16-NEXT: vmaxnm.f32 s0, s2, s0
+; FP16-NEXT: vcmp.f32 s2, s2
; FP16-NEXT: vminnm.f32 s0, s0, s4
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
; FP16-NEXT: vcvt.u32.f32 s0, s0
; FP16-NEXT: vmov r0, s0
+; FP16-NEXT: it vs
+; FP16-NEXT: movvs r0, #0
; FP16-NEXT: bx lr
; FP16-NEXT: .p2align 2
; FP16-NEXT: @ %bb.1:
@@ -876,9 +902,13 @@ define i1 @test_signed_i1_f64(double %f) nounwind {
; FP16-NEXT: vldr d1, .LCPI10_0
; FP16-NEXT: vmov d2, r0, r1
; FP16-NEXT: vmaxnm.f64 d1, d2, d1
+; FP16-NEXT: vcmp.f64 d2, d2
; FP16-NEXT: vminnm.f64 d0, d1, d0
; FP16-NEXT: vcvt.u32.f64 s0, d0
; FP16-NEXT: vmov r0, s0
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: it vs
+; FP16-NEXT: movvs r0, #0
; FP16-NEXT: bx lr
; FP16-NEXT: .p2align 3
; FP16-NEXT: @ %bb.1:
@@ -959,9 +989,13 @@ define i8 @test_signed_i8_f64(double %f) nounwind {
; FP16-NEXT: vmov d1, r0, r1
; FP16-NEXT: vldr d2, .LCPI11_1
; FP16-NEXT: vmaxnm.f64 d0, d1, d0
+; FP16-NEXT: vcmp.f64 d1, d1
; FP16-NEXT: vminnm.f64 d0, d0, d2
; FP16-NEXT: vcvt.u32.f64 s0, d0
; FP16-NEXT: vmov r0, s0
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: it vs
+; FP16-NEXT: movvs r0, #0
; FP16-NEXT: bx lr
; FP16-NEXT: .p2align 3
; FP16-NEXT: @ %bb.1:
@@ -1047,9 +1081,13 @@ define i13 @test_signed_i13_f64(double %f) nounwind {
; FP16-NEXT: vmov d1, r0, r1
; FP16-NEXT: vldr d2, .LCPI12_1
; FP16-NEXT: vmaxnm.f64 d0, d1, d0
+; FP16-NEXT: vcmp.f64 d1, d1
; FP16-NEXT: vminnm.f64 d0, d0, d2
; FP16-NEXT: vcvt.u32.f64 s0, d0
; FP16-NEXT: vmov r0, s0
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: it vs
+; FP16-NEXT: movvs r0, #0
; FP16-NEXT: bx lr
; FP16-NEXT: .p2align 3
; FP16-NEXT: @ %bb.1:
@@ -1135,9 +1173,13 @@ define i16 @test_signed_i16_f64(double %f) nounwind {
; FP16-NEXT: vmov d1, r0, r1
; FP16-NEXT: vldr d2, .LCPI13_1
; FP16-NEXT: vmaxnm.f64 d0, d1, d0
+; FP16-NEXT: vcmp.f64 d1, d1
; FP16-NEXT: vminnm.f64 d0, d0, d2
; FP16-NEXT: vcvt.u32.f64 s0, d0
; FP16-NEXT: vmov r0, s0
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: it vs
+; FP16-NEXT: movvs r0, #0
; FP16-NEXT: bx lr
; FP16-NEXT: .p2align 3
; FP16-NEXT: @ %bb.1:
@@ -1224,9 +1266,13 @@ define i19 @test_signed_i19_f64(double %f) nounwind {
; FP16-NEXT: vmov d1, r0, r1
; FP16-NEXT: vldr d2, .LCPI14_1
; FP16-NEXT: vmaxnm.f64 d0, d1, d0
+; FP16-NEXT: vcmp.f64 d1, d1
; FP16-NEXT: vminnm.f64 d0, d0, d2
; FP16-NEXT: vcvt.u32.f64 s0, d0
; FP16-NEXT: vmov r0, s0
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: it vs
+; FP16-NEXT: movvs r0, #0
; FP16-NEXT: bx lr
; FP16-NEXT: .p2align 3
; FP16-NEXT: @ %bb.1:
@@ -1393,13 +1439,21 @@ define i50 @test_signed_i50_f64(double %f) nounwind {
; FP16: @ %bb.0:
; FP16-NEXT: .save {r7, lr}
; FP16-NEXT: push {r7, lr}
+; FP16-NEXT: .vsave {d8}
+; FP16-NEXT: vpush {d8}
; FP16-NEXT: vldr d0, .LCPI16_0
-; FP16-NEXT: vmov d1, r0, r1
-; FP16-NEXT: vldr d2, .LCPI16_1
-; FP16-NEXT: vmaxnm.f64 d0, d1, d0
-; FP16-NEXT: vminnm.f64 d0, d0, d2
+; FP16-NEXT: vmov d8, r0, r1
+; FP16-NEXT: vldr d1, .LCPI16_1
+; FP16-NEXT: vmaxnm.f64 d0, d8, d0
+; FP16-NEXT: vminnm.f64 d0, d0, d1
; FP16-NEXT: vmov r0, r1, d0
; FP16-NEXT: bl __aeabi_d2ulz
+; FP16-NEXT: vcmp.f64 d8, d8
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: itt vs
+; FP16-NEXT: movvs r0, #0
+; FP16-NEXT: movvs r1, #0
+; FP16-NEXT: vpop {d8}
; FP16-NEXT: pop {r7, pc}
; FP16-NEXT: .p2align 3
; FP16-NEXT: @ %bb.1:
@@ -1864,19 +1918,22 @@ define i1 @test_signed_i1_f16(half %f) nounwind {
; VFP2-NEXT: .save {r7, lr}
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: bl __aeabi_h2f
-; VFP2-NEXT: vmov s0, r0
; VFP2-NEXT: vmov.f32 s4, #1.000000e+00
-; VFP2-NEXT: vcvt.u32.f32 s2, s0
-; VFP2-NEXT: vcmp.f32 s0, #0
+; VFP2-NEXT: vldr s2, .LCPI20_0
+; VFP2-NEXT: vmov s0, r0
+; VFP2-NEXT: vmax.f32 d16, d0, d1
+; VFP2-NEXT: vcmp.f32 s0, s0
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s4
+; VFP2-NEXT: vmin.f32 d1, d16, d2
+; VFP2-NEXT: vcvt.u32.f32 s2, s2
; VFP2-NEXT: vmov r0, s2
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r0, #0
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movgt r0, #1
+; VFP2-NEXT: it vs
+; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: pop {r7, pc}
+; VFP2-NEXT: .p2align 2
+; VFP2-NEXT: @ %bb.1:
+; VFP2-NEXT: .LCPI20_0:
+; VFP2-NEXT: .long 0x00000000 @ float 0
;
; FP16-LABEL: test_signed_i1_f16:
; FP16: @ %bb.0:
@@ -1884,10 +1941,14 @@ define i1 @test_signed_i1_f16(half %f) nounwind {
; FP16-NEXT: vldr s4, .LCPI20_0
; FP16-NEXT: vcvtb.f32.f16 s0, s0
; FP16-NEXT: vmov.f32 s2, #1.000000e+00
-; FP16-NEXT: vmaxnm.f32 s0, s0, s4
-; FP16-NEXT: vminnm.f32 s0, s0, s2
-; FP16-NEXT: vcvt.u32.f32 s0, s0
-; FP16-NEXT: vmov r0, s0
+; FP16-NEXT: vmaxnm.f32 s4, s0, s4
+; FP16-NEXT: vcmp.f32 s0, s0
+; FP16-NEXT: vminnm.f32 s2, s4, s2
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: vcvt.u32.f32 s2, s2
+; FP16-NEXT: vmov r0, s2
+; FP16-NEXT: it vs
+; FP16-NEXT: movvs r0, #0
; FP16-NEXT: bx lr
; FP16-NEXT: .p2align 2
; FP16-NEXT: @ %bb.1:
@@ -1936,22 +1997,23 @@ define i8 @test_signed_i8_f16(half %f) nounwind {
; VFP2-NEXT: .save {r7, lr}
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: bl __aeabi_h2f
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s4, .LCPI21_0
-; VFP2-NEXT: vcvt.u32.f32 s2, s0
-; VFP2-NEXT: vcmp.f32 s0, #0
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s4
-; VFP2-NEXT: vmov r0, s2
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r0, #0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI21_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI21_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movgt r0, #255
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.u32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
+; VFP2-NEXT: it vs
+; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: pop {r7, pc}
; VFP2-NEXT: .p2align 2
; VFP2-NEXT: @ %bb.1:
; VFP2-NEXT: .LCPI21_0:
+; VFP2-NEXT: .long 0x00000000 @ float 0
+; VFP2-NEXT: .LCPI21_1:
; VFP2-NEXT: .long 0x437f0000 @ float 255
;
; FP16-LABEL: test_signed_i8_f16:
@@ -1960,10 +2022,14 @@ define i8 @test_signed_i8_f16(half %f) nounwind {
; FP16-NEXT: vldr s4, .LCPI21_1
; FP16-NEXT: vcvtb.f32.f16 s0, s0
; FP16-NEXT: vldr s2, .LCPI21_0
-; FP16-NEXT: vmaxnm.f32 s0, s0, s4
-; FP16-NEXT: vminnm.f32 s0, s0, s2
-; FP16-NEXT: vcvt.u32.f32 s0, s0
-; FP16-NEXT: vmov r0, s0
+; FP16-NEXT: vmaxnm.f32 s4, s0, s4
+; FP16-NEXT: vcmp.f32 s0, s0
+; FP16-NEXT: vminnm.f32 s2, s4, s2
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: vcvt.u32.f32 s2, s2
+; FP16-NEXT: vmov r0, s2
+; FP16-NEXT: it vs
+; FP16-NEXT: movvs r0, #0
; FP16-NEXT: bx lr
; FP16-NEXT: .p2align 2
; FP16-NEXT: @ %bb.1:
@@ -2016,22 +2082,23 @@ define i13 @test_signed_i13_f16(half %f) nounwind {
; VFP2-NEXT: .save {r7, lr}
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: bl __aeabi_h2f
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s4, .LCPI22_0
-; VFP2-NEXT: vcvt.u32.f32 s2, s0
-; VFP2-NEXT: vcmp.f32 s0, #0
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s4
-; VFP2-NEXT: vmov r0, s2
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r0, #0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI22_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI22_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movwgt r0, #8191
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.u32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
+; VFP2-NEXT: it vs
+; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: pop {r7, pc}
; VFP2-NEXT: .p2align 2
; VFP2-NEXT: @ %bb.1:
; VFP2-NEXT: .LCPI22_0:
+; VFP2-NEXT: .long 0x00000000 @ float 0
+; VFP2-NEXT: .LCPI22_1:
; VFP2-NEXT: .long 0x45fff800 @ float 8191
;
; FP16-LABEL: test_signed_i13_f16:
@@ -2040,10 +2107,14 @@ define i13 @test_signed_i13_f16(half %f) nounwind {
; FP16-NEXT: vldr s4, .LCPI22_1
; FP16-NEXT: vcvtb.f32.f16 s0, s0
; FP16-NEXT: vldr s2, .LCPI22_0
-; FP16-NEXT: vmaxnm.f32 s0, s0, s4
-; FP16-NEXT: vminnm.f32 s0, s0, s2
-; FP16-NEXT: vcvt.u32.f32 s0, s0
-; FP16-NEXT: vmov r0, s0
+; FP16-NEXT: vmaxnm.f32 s4, s0, s4
+; FP16-NEXT: vcmp.f32 s0, s0
+; FP16-NEXT: vminnm.f32 s2, s4, s2
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: vcvt.u32.f32 s2, s2
+; FP16-NEXT: vmov r0, s2
+; FP16-NEXT: it vs
+; FP16-NEXT: movvs r0, #0
; FP16-NEXT: bx lr
; FP16-NEXT: .p2align 2
; FP16-NEXT: @ %bb.1:
@@ -2096,22 +2167,23 @@ define i16 @test_signed_i16_f16(half %f) nounwind {
; VFP2-NEXT: .save {r7, lr}
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: bl __aeabi_h2f
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s4, .LCPI23_0
-; VFP2-NEXT: vcvt.u32.f32 s2, s0
-; VFP2-NEXT: vcmp.f32 s0, #0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI23_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI23_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s4
-; VFP2-NEXT: vmov r0, s2
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r0, #0
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movwgt r0, #65535
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.u32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
+; VFP2-NEXT: it vs
+; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: pop {r7, pc}
; VFP2-NEXT: .p2align 2
; VFP2-NEXT: @ %bb.1:
; VFP2-NEXT: .LCPI23_0:
+; VFP2-NEXT: .long 0x00000000 @ float 0
+; VFP2-NEXT: .LCPI23_1:
; VFP2-NEXT: .long 0x477fff00 @ float 65535
;
; FP16-LABEL: test_signed_i16_f16:
@@ -2120,10 +2192,14 @@ define i16 @test_signed_i16_f16(half %f) nounwind {
; FP16-NEXT: vldr s4, .LCPI23_1
; FP16-NEXT: vcvtb.f32.f16 s0, s0
; FP16-NEXT: vldr s2, .LCPI23_0
-; FP16-NEXT: vmaxnm.f32 s0, s0, s4
-; FP16-NEXT: vminnm.f32 s0, s0, s2
-; FP16-NEXT: vcvt.u32.f32 s0, s0
-; FP16-NEXT: vmov r0, s0
+; FP16-NEXT: vmaxnm.f32 s4, s0, s4
+; FP16-NEXT: vcmp.f32 s0, s0
+; FP16-NEXT: vminnm.f32 s2, s4, s2
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: vcvt.u32.f32 s2, s2
+; FP16-NEXT: vmov r0, s2
+; FP16-NEXT: it vs
+; FP16-NEXT: movvs r0, #0
; FP16-NEXT: bx lr
; FP16-NEXT: .p2align 2
; FP16-NEXT: @ %bb.1:
@@ -2176,23 +2252,23 @@ define i19 @test_signed_i19_f16(half %f) nounwind {
; VFP2-NEXT: .save {r7, lr}
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: bl __aeabi_h2f
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s4, .LCPI24_0
-; VFP2-NEXT: vcvt.u32.f32 s2, s0
-; VFP2-NEXT: vcmp.f32 s0, #0
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s4
-; VFP2-NEXT: vmov r0, s2
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r0, #0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI24_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI24_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: itt gt
-; VFP2-NEXT: movwgt r0, #65535
-; VFP2-NEXT: movtgt r0, #7
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.u32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
+; VFP2-NEXT: it vs
+; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: pop {r7, pc}
; VFP2-NEXT: .p2align 2
; VFP2-NEXT: @ %bb.1:
; VFP2-NEXT: .LCPI24_0:
+; VFP2-NEXT: .long 0x00000000 @ float 0
+; VFP2-NEXT: .LCPI24_1:
; VFP2-NEXT: .long 0x48ffffe0 @ float 524287
;
; FP16-LABEL: test_signed_i19_f16:
@@ -2201,10 +2277,14 @@ define i19 @test_signed_i19_f16(half %f) nounwind {
; FP16-NEXT: vldr s4, .LCPI24_1
; FP16-NEXT: vcvtb.f32.f16 s0, s0
; FP16-NEXT: vldr s2, .LCPI24_0
-; FP16-NEXT: vmaxnm.f32 s0, s0, s4
-; FP16-NEXT: vminnm.f32 s0, s0, s2
-; FP16-NEXT: vcvt.u32.f32 s0, s0
-; FP16-NEXT: vmov r0, s0
+; FP16-NEXT: vmaxnm.f32 s4, s0, s4
+; FP16-NEXT: vcmp.f32 s0, s0
+; FP16-NEXT: vminnm.f32 s2, s4, s2
+; FP16-NEXT: vmrs APSR_nzcv, fpscr
+; FP16-NEXT: vcvt.u32.f32 s2, s2
+; FP16-NEXT: vmov r0, s2
+; FP16-NEXT: it vs
+; FP16-NEXT: movvs r0, #0
; FP16-NEXT: bx lr
; FP16-NEXT: .p2align 2
; FP16-NEXT: @ %bb.1:
diff --git a/llvm/test/CodeGen/Hexagon/fptoi.sat.ll b/llvm/test/CodeGen/Hexagon/fptoi.sat.ll
new file mode 100644
index 0000000000000..864b3b62379b6
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/fptoi.sat.ll
@@ -0,0 +1,247 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=hexagon < %s | FileCheck %s
+
+define i32 @fptoui.sat.i32.f32(float %x) {
+; CHECK-LABEL: fptoui.sat.i32.f32:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r1 = convert_sf2uw(r0):chop
+; CHECK-NEXT: r3:2 = combine(##1333788671,#0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = sfcmp.gt(r2,r0)
+; CHECK-NEXT: p1 = sfcmp.uo(r2,r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = or(p1,p0)
+; CHECK-NEXT: p2 = sfcmp.uo(r0,r3)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: p3 = sfcmp.gt(r0,r3)
+; CHECK-NEXT: if (p0) r1 = #0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: p1 = and(p3,!p2)
+; CHECK-NEXT: if (p1.new) r1 = #-1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %res = call i32 @llvm.fptoui.sat(float %x)
+ ret i32 %res
+}
+
+define i16 @fptoui.sat.i16.f32(float %x) {
+; CHECK-LABEL: fptoui.sat.i16.f32:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r2 = ##1199570688
+; CHECK-NEXT: r1 = #0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = sfmax(r0,r1)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = sfmin(r0,r2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = convert_sf2uw(r0):chop
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %res = call i16 @llvm.fptoui.sat(float %x)
+ ret i16 %res
+}
+
+define i64 @fptoui.sat.i64.f64(double %x) {
+; CHECK-LABEL: fptoui.sat.i64.f64:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r9:8 = convert_df2ud(r1:0):chop
+; CHECK-NEXT: r3:2 = combine(#0,#0)
+; CHECK-NEXT: r5:4 = CONST64(#4895412794951729151)
+; CHECK-NEXT: r6 = #0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = dfcmp.gt(r3:2,r1:0)
+; CHECK-NEXT: p2 = dfcmp.uo(r3:2,r1:0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = or(p2,p0)
+; CHECK-NEXT: p1 = dfcmp.gt(r1:0,r5:4)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: p2 = dfcmp.uo(r1:0,r5:4)
+; CHECK-NEXT: r1 = #-1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: p1 = and(p1,!p2)
+; CHECK-NEXT: r2 = mux(p0,r6,r8)
+; CHECK-NEXT: r3 = mux(p0,r6,r9)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = mux(p1,r1,r2)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: if (!p1) r1 = add(r3,#0)
+; CHECK-NEXT: }
+ %res = call i64 @llvm.fptoui.sat(double %x)
+ ret i64 %res
+}
+
+define i32 @fptoui.sat.i32.f64(double %x) {
+; CHECK-LABEL: fptoui.sat.i32.f64:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r3:2 = combine(#0,#0)
+; CHECK-NEXT: r5:4 = CONST64(#4751297606873776128)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = dfmax(r1:0,r3:2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = dfmin(r1:0,r5:4)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = convert_df2uw(r1:0):chop
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %res = call i32 @llvm.fptoui.sat(double %x)
+ ret i32 %res
+}
+
+define i32 @fptosi.sat.i32.f32(float %x) {
+; CHECK-LABEL: fptosi.sat.i32.f32:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r2 = ##-822083584
+; CHECK-NEXT: r3 = ##1325400063
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1 = convert_sf2w(r0):chop
+; CHECK-NEXT: p1 = sfcmp.gt(r2,r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: p2 = sfcmp.uo(r2,r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: p1 = or(p2,p1)
+; CHECK-NEXT: p0 = sfcmp.uo(r0,r3)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: p2 = sfcmp.gt(r0,r3)
+; CHECK-NEXT: p1 = sfcmp.uo(r0,r0)
+; CHECK-NEXT: if (p1) r1 = ##-2147483648
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = and(p2,!p0)
+; CHECK-NEXT: if (p0.new) r1 = ##2147483647
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p1) r1 = #0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = r1
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %res = call i32 @llvm.fptosi.sat(float %x)
+ ret i32 %res
+}
+
+define i16 @fptosi.sat.i16.f32(float %x) {
+; CHECK-LABEL: fptosi.sat.i16.f32:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r1 = ##-956301312
+; CHECK-NEXT: r2 = ##1191181824
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1 = sfmax(r0,r1)
+; CHECK-NEXT: p0 = sfcmp.uo(r0,r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1 = sfmin(r1,r2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = convert_sf2w(r1):chop
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) r0 = #0
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %res = call i16 @llvm.fptosi.sat(float %x)
+ ret i16 %res
+}
+
+define i64 @fptosi.sat.i64.f64(double %x) {
+; CHECK-LABEL: fptosi.sat.i64.f64:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = dfcmp.uo(r1:0,r1:0)
+; CHECK-NEXT: r6 = #0
+; CHECK-NEXT: r3:2 = CONST64(#-4332462841530417152)
+; CHECK-NEXT: r5:4 = CONST64(#4890909195324358655)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: p1 = dfcmp.gt(r3:2,r1:0)
+; CHECK-NEXT: p2 = dfcmp.uo(r3:2,r1:0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: p1 = or(p2,p1)
+; CHECK-NEXT: r3:2 = convert_df2d(r1:0):chop
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: p2 = dfcmp.uo(r1:0,r5:4)
+; CHECK-NEXT: p3 = dfcmp.gt(r1:0,r5:4)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: p2 = and(p3,!p2)
+; CHECK-NEXT: r1 = mux(p1,r6,r2)
+; CHECK-NEXT: r2 = mux(p1,##-2147483648,r3)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p2) r1 = #-1
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = mux(p0,r6,r1)
+; CHECK-NEXT: if (p2) r2 = ##2147483647
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1 = mux(p0,r6,r2)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %res = call i64 @llvm.fptosi.sat(double %x)
+ ret i64 %res
+}
+
+define i32 @fptosi.sat.i32.f64(double %x) {
+; CHECK-LABEL: fptosi.sat.i32.f64:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: p0 = dfcmp.uo(r1:0,r1:0)
+; CHECK-NEXT: r3:2 = CONST64(#-4476578029606273024)
+; CHECK-NEXT: r5:4 = CONST64(#4746794007244308480)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r3:2 = dfmax(r1:0,r3:2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r3:2 = dfmin(r3:2,r5:4)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = convert_df2w(r3:2):chop
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: if (p0) r0 = #0
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %res = call i32 @llvm.fptosi.sat(double %x)
+ ret i32 %res
+}
diff --git a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
index ee040feca4240..1a22270ea0ebe 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
@@ -1191,32 +1191,48 @@ define arm_aapcs_vfpcc <4 x i1> @test_unsigned_v4f32_v4i1(<4 x float> %f) {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr s4, .LCPI22_0
; CHECK-NEXT: vmov.f32 s6, #1.000000e+00
+; CHECK-NEXT: vcmp.f32 s0, s0
; CHECK-NEXT: movs r1, #0
-; CHECK-NEXT: vmaxnm.f32 s0, s0, s4
+; CHECK-NEXT: vmaxnm.f32 s12, s0, s4
; CHECK-NEXT: vmaxnm.f32 s8, s3, s4
-; CHECK-NEXT: vminnm.f32 s0, s0, s6
-; CHECK-NEXT: vmaxnm.f32 s2, s2, s4
-; CHECK-NEXT: vcvt.u32.f32 s0, s0
+; CHECK-NEXT: vminnm.f32 s12, s12, s6
+; CHECK-NEXT: vmaxnm.f32 s10, s2, s4
+; CHECK-NEXT: vcvt.u32.f32 s12, s12
; CHECK-NEXT: vmaxnm.f32 s4, s1, s4
; CHECK-NEXT: vminnm.f32 s4, s4, s6
-; CHECK-NEXT: vminnm.f32 s2, s2, s6
+; CHECK-NEXT: vminnm.f32 s10, s10, s6
; CHECK-NEXT: vcvt.u32.f32 s4, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcvt.u32.f32 s10, s10
; CHECK-NEXT: vminnm.f32 s8, s8, s6
-; CHECK-NEXT: vcvt.u32.f32 s2, s2
+; CHECK-NEXT: vcmp.f32 s1, s1
; CHECK-NEXT: vcvt.u32.f32 s8, s8
-; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: vmov r2, s12
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r2, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: and r2, r2, #1
-; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: vcmp.f32 s2, s2
+; CHECK-NEXT: rsb.w r2, r2, #0
; CHECK-NEXT: bfi r1, r2, #0, #1
; CHECK-NEXT: vmov r2, s4
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r2, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: and r2, r2, #1
-; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: vcmp.f32 s3, s3
+; CHECK-NEXT: rsb.w r2, r2, #0
; CHECK-NEXT: bfi r1, r2, #1, #1
-; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: vmov r2, s10
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r2, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: and r2, r2, #1
-; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: rsb.w r2, r2, #0
; CHECK-NEXT: bfi r1, r2, #2, #1
; CHECK-NEXT: vmov r2, s8
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r2, #0
; CHECK-NEXT: and r2, r2, #1
; CHECK-NEXT: rsbs r2, r2, #0
; CHECK-NEXT: bfi r1, r2, #3, #1
@@ -1234,25 +1250,41 @@ define arm_aapcs_vfpcc <4 x i8> @test_unsigned_v4f32_v4i8(<4 x float> %f) {
; CHECK-MVE-LABEL: test_unsigned_v4f32_v4i8:
; CHECK-MVE: @ %bb.0:
; CHECK-MVE-NEXT: vldr s4, .LCPI23_0
+; CHECK-MVE-NEXT: vcmp.f32 s2, s2
; CHECK-MVE-NEXT: vldr s6, .LCPI23_1
-; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s4
-; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s4
+; CHECK-MVE-NEXT: vmaxnm.f32 s12, s2, s4
+; CHECK-MVE-NEXT: vmaxnm.f32 s10, s0, s4
+; CHECK-MVE-NEXT: vminnm.f32 s12, s12, s6
; CHECK-MVE-NEXT: vmaxnm.f32 s8, s3, s4
-; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s6
-; CHECK-MVE-NEXT: vminnm.f32 s0, s0, s6
+; CHECK-MVE-NEXT: vminnm.f32 s10, s10, s6
; CHECK-MVE-NEXT: vmaxnm.f32 s4, s1, s4
+; CHECK-MVE-NEXT: vcvt.u32.f32 s12, s12
; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s6
; CHECK-MVE-NEXT: vminnm.f32 s4, s4, s6
-; CHECK-MVE-NEXT: vcvt.u32.f32 s2, s2
-; CHECK-MVE-NEXT: vcvt.u32.f32 s0, s0
+; CHECK-MVE-NEXT: vcvt.u32.f32 s10, s10
; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s8
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s4
-; CHECK-MVE-NEXT: vmov r0, s2
-; CHECK-MVE-NEXT: vmov r1, s0
+; CHECK-MVE-NEXT: vcmp.f32 s0, s0
+; CHECK-MVE-NEXT: vmov r0, s12
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r0, #0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vmov r1, s10
+; CHECK-MVE-NEXT: vcmp.f32 s3, s3
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r1, #0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vmov r2, s8
+; CHECK-MVE-NEXT: vcmp.f32 s1, s1
; CHECK-MVE-NEXT: vmov q0[2], q0[0], r1, r0
-; CHECK-MVE-NEXT: vmov r0, s8
-; CHECK-MVE-NEXT: vmov r1, s4
-; CHECK-MVE-NEXT: vmov q0[3], q0[1], r1, r0
+; CHECK-MVE-NEXT: vmov r3, s4
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r2, #0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r3, #0
+; CHECK-MVE-NEXT: vmov q0[3], q0[1], r3, r2
; CHECK-MVE-NEXT: bx lr
; CHECK-MVE-NEXT: .p2align 2
; CHECK-MVE-NEXT: @ %bb.1:
@@ -1275,25 +1307,41 @@ define arm_aapcs_vfpcc <4 x i13> @test_unsigned_v4f32_v4i13(<4 x float> %f) {
; CHECK-MVE-LABEL: test_unsigned_v4f32_v4i13:
; CHECK-MVE: @ %bb.0:
; CHECK-MVE-NEXT: vldr s4, .LCPI24_0
+; CHECK-MVE-NEXT: vcmp.f32 s2, s2
; CHECK-MVE-NEXT: vldr s6, .LCPI24_1
-; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s4
-; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s4
+; CHECK-MVE-NEXT: vmaxnm.f32 s12, s2, s4
+; CHECK-MVE-NEXT: vmaxnm.f32 s10, s0, s4
+; CHECK-MVE-NEXT: vminnm.f32 s12, s12, s6
; CHECK-MVE-NEXT: vmaxnm.f32 s8, s3, s4
-; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s6
-; CHECK-MVE-NEXT: vminnm.f32 s0, s0, s6
+; CHECK-MVE-NEXT: vminnm.f32 s10, s10, s6
; CHECK-MVE-NEXT: vmaxnm.f32 s4, s1, s4
+; CHECK-MVE-NEXT: vcvt.u32.f32 s12, s12
; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s6
; CHECK-MVE-NEXT: vminnm.f32 s4, s4, s6
-; CHECK-MVE-NEXT: vcvt.u32.f32 s2, s2
-; CHECK-MVE-NEXT: vcvt.u32.f32 s0, s0
+; CHECK-MVE-NEXT: vcvt.u32.f32 s10, s10
; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s8
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s4
-; CHECK-MVE-NEXT: vmov r0, s2
-; CHECK-MVE-NEXT: vmov r1, s0
+; CHECK-MVE-NEXT: vcmp.f32 s0, s0
+; CHECK-MVE-NEXT: vmov r0, s12
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r0, #0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vmov r1, s10
+; CHECK-MVE-NEXT: vcmp.f32 s3, s3
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r1, #0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vmov r2, s8
+; CHECK-MVE-NEXT: vcmp.f32 s1, s1
; CHECK-MVE-NEXT: vmov q0[2], q0[0], r1, r0
-; CHECK-MVE-NEXT: vmov r0, s8
-; CHECK-MVE-NEXT: vmov r1, s4
-; CHECK-MVE-NEXT: vmov q0[3], q0[1], r1, r0
+; CHECK-MVE-NEXT: vmov r3, s4
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r2, #0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r3, #0
+; CHECK-MVE-NEXT: vmov q0[3], q0[1], r3, r2
; CHECK-MVE-NEXT: bx lr
; CHECK-MVE-NEXT: .p2align 2
; CHECK-MVE-NEXT: @ %bb.1:
@@ -1316,25 +1364,41 @@ define arm_aapcs_vfpcc <4 x i16> @test_unsigned_v4f32_v4i16(<4 x float> %f) {
; CHECK-MVE-LABEL: test_unsigned_v4f32_v4i16:
; CHECK-MVE: @ %bb.0:
; CHECK-MVE-NEXT: vldr s4, .LCPI25_0
+; CHECK-MVE-NEXT: vcmp.f32 s2, s2
; CHECK-MVE-NEXT: vldr s6, .LCPI25_1
-; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s4
-; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s4
+; CHECK-MVE-NEXT: vmaxnm.f32 s12, s2, s4
+; CHECK-MVE-NEXT: vmaxnm.f32 s10, s0, s4
+; CHECK-MVE-NEXT: vminnm.f32 s12, s12, s6
; CHECK-MVE-NEXT: vmaxnm.f32 s8, s3, s4
-; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s6
-; CHECK-MVE-NEXT: vminnm.f32 s0, s0, s6
+; CHECK-MVE-NEXT: vminnm.f32 s10, s10, s6
; CHECK-MVE-NEXT: vmaxnm.f32 s4, s1, s4
+; CHECK-MVE-NEXT: vcvt.u32.f32 s12, s12
; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s6
; CHECK-MVE-NEXT: vminnm.f32 s4, s4, s6
-; CHECK-MVE-NEXT: vcvt.u32.f32 s2, s2
-; CHECK-MVE-NEXT: vcvt.u32.f32 s0, s0
+; CHECK-MVE-NEXT: vcvt.u32.f32 s10, s10
; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s8
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s4
-; CHECK-MVE-NEXT: vmov r0, s2
-; CHECK-MVE-NEXT: vmov r1, s0
+; CHECK-MVE-NEXT: vcmp.f32 s0, s0
+; CHECK-MVE-NEXT: vmov r0, s12
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r0, #0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vmov r1, s10
+; CHECK-MVE-NEXT: vcmp.f32 s3, s3
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r1, #0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vmov r2, s8
+; CHECK-MVE-NEXT: vcmp.f32 s1, s1
; CHECK-MVE-NEXT: vmov q0[2], q0[0], r1, r0
-; CHECK-MVE-NEXT: vmov r0, s8
-; CHECK-MVE-NEXT: vmov r1, s4
-; CHECK-MVE-NEXT: vmov q0[3], q0[1], r1, r0
+; CHECK-MVE-NEXT: vmov r3, s4
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r2, #0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r3, #0
+; CHECK-MVE-NEXT: vmov q0[3], q0[1], r3, r2
; CHECK-MVE-NEXT: bx lr
; CHECK-MVE-NEXT: .p2align 2
; CHECK-MVE-NEXT: @ %bb.1:
@@ -1357,25 +1421,41 @@ define arm_aapcs_vfpcc <4 x i19> @test_unsigned_v4f32_v4i19(<4 x float> %f) {
; CHECK-MVE-LABEL: test_unsigned_v4f32_v4i19:
; CHECK-MVE: @ %bb.0:
; CHECK-MVE-NEXT: vldr s4, .LCPI26_0
+; CHECK-MVE-NEXT: vcmp.f32 s2, s2
; CHECK-MVE-NEXT: vldr s6, .LCPI26_1
-; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s4
-; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s4
+; CHECK-MVE-NEXT: vmaxnm.f32 s12, s2, s4
+; CHECK-MVE-NEXT: vmaxnm.f32 s10, s0, s4
+; CHECK-MVE-NEXT: vminnm.f32 s12, s12, s6
; CHECK-MVE-NEXT: vmaxnm.f32 s8, s3, s4
-; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s6
-; CHECK-MVE-NEXT: vminnm.f32 s0, s0, s6
+; CHECK-MVE-NEXT: vminnm.f32 s10, s10, s6
; CHECK-MVE-NEXT: vmaxnm.f32 s4, s1, s4
+; CHECK-MVE-NEXT: vcvt.u32.f32 s12, s12
; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s6
; CHECK-MVE-NEXT: vminnm.f32 s4, s4, s6
-; CHECK-MVE-NEXT: vcvt.u32.f32 s2, s2
-; CHECK-MVE-NEXT: vcvt.u32.f32 s0, s0
+; CHECK-MVE-NEXT: vcvt.u32.f32 s10, s10
; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s8
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s4
-; CHECK-MVE-NEXT: vmov r0, s2
-; CHECK-MVE-NEXT: vmov r1, s0
+; CHECK-MVE-NEXT: vcmp.f32 s0, s0
+; CHECK-MVE-NEXT: vmov r0, s12
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r0, #0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vmov r1, s10
+; CHECK-MVE-NEXT: vcmp.f32 s3, s3
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r1, #0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vmov r2, s8
+; CHECK-MVE-NEXT: vcmp.f32 s1, s1
; CHECK-MVE-NEXT: vmov q0[2], q0[0], r1, r0
-; CHECK-MVE-NEXT: vmov r0, s8
-; CHECK-MVE-NEXT: vmov r1, s4
-; CHECK-MVE-NEXT: vmov q0[3], q0[1], r1, r0
+; CHECK-MVE-NEXT: vmov r3, s4
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r2, #0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r3, #0
+; CHECK-MVE-NEXT: vmov q0[3], q0[1], r3, r2
; CHECK-MVE-NEXT: bx lr
; CHECK-MVE-NEXT: .p2align 2
; CHECK-MVE-NEXT: @ %bb.1:
@@ -2910,74 +2990,109 @@ declare <8 x i128> @llvm.fptoui.sat.v8f16.v8i128(<8 x half>)
define arm_aapcs_vfpcc <8 x i1> @test_unsigned_v8f16_v8i1(<8 x half> %f) {
; CHECK-LABEL: test_unsigned_v8f16_v8i1:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vldr s4, .LCPI42_0
-; CHECK-NEXT: vcvtt.f32.f16 s8, s3
-; CHECK-NEXT: vcvtb.f32.f16 s10, s3
-; CHECK-NEXT: vcvtb.f32.f16 s3, s0
-; CHECK-NEXT: vmov.f32 s6, #1.000000e+00
-; CHECK-NEXT: vmaxnm.f32 s3, s3, s4
-; CHECK-NEXT: vminnm.f32 s3, s3, s6
-; CHECK-NEXT: vcvtt.f32.f16 s0, s0
-; CHECK-NEXT: vcvt.u32.f32 s3, s3
-; CHECK-NEXT: vmaxnm.f32 s0, s0, s4
-; CHECK-NEXT: vminnm.f32 s0, s0, s6
-; CHECK-NEXT: movs r1, #0
-; CHECK-NEXT: vcvt.u32.f32 s0, s0
-; CHECK-NEXT: vcvtt.f32.f16 s14, s1
-; CHECK-NEXT: vcvtb.f32.f16 s1, s1
-; CHECK-NEXT: vmaxnm.f32 s14, s14, s4
-; CHECK-NEXT: vmaxnm.f32 s1, s1, s4
-; CHECK-NEXT: vminnm.f32 s14, s14, s6
-; CHECK-NEXT: vminnm.f32 s1, s1, s6
-; CHECK-NEXT: vcvt.u32.f32 s14, s14
-; CHECK-NEXT: vcvt.u32.f32 s1, s1
+; CHECK-NEXT: .vsave {d8}
+; CHECK-NEXT: vpush {d8}
+; CHECK-NEXT: vldr s7, .LCPI42_0
+; CHECK-NEXT: vcvtb.f32.f16 s15, s0
+; CHECK-NEXT: vmov.f32 s5, #1.000000e+00
; CHECK-NEXT: vcvtt.f32.f16 s12, s2
-; CHECK-NEXT: vmov r2, s3
+; CHECK-NEXT: vmaxnm.f32 s16, s15, s7
+; CHECK-NEXT: vcvtt.f32.f16 s9, s1
+; CHECK-NEXT: vminnm.f32 s16, s16, s5
+; CHECK-NEXT: vcvtt.f32.f16 s4, s3
+; CHECK-NEXT: vcvt.u32.f32 s16, s16
+; CHECK-NEXT: vcvtb.f32.f16 s8, s3
; CHECK-NEXT: vcvtb.f32.f16 s2, s2
-; CHECK-NEXT: vmaxnm.f32 s2, s2, s4
-; CHECK-NEXT: vmaxnm.f32 s12, s12, s4
-; CHECK-NEXT: vminnm.f32 s2, s2, s6
-; CHECK-NEXT: vminnm.f32 s12, s12, s6
-; CHECK-NEXT: vcvt.u32.f32 s2, s2
-; CHECK-NEXT: vmaxnm.f32 s10, s10, s4
-; CHECK-NEXT: vcvt.u32.f32 s12, s12
-; CHECK-NEXT: vminnm.f32 s10, s10, s6
-; CHECK-NEXT: vcvt.u32.f32 s10, s10
-; CHECK-NEXT: vmaxnm.f32 s8, s8, s4
-; CHECK-NEXT: vminnm.f32 s8, s8, s6
-; CHECK-NEXT: vcvt.u32.f32 s8, s8
+; CHECK-NEXT: vcvtb.f32.f16 s1, s1
+; CHECK-NEXT: vcvtt.f32.f16 s0, s0
+; CHECK-NEXT: vmaxnm.f32 s6, s4, s7
+; CHECK-NEXT: vmaxnm.f32 s10, s8, s7
+; CHECK-NEXT: vmaxnm.f32 s14, s12, s7
+; CHECK-NEXT: vmaxnm.f32 s3, s2, s7
+; CHECK-NEXT: vmaxnm.f32 s11, s9, s7
+; CHECK-NEXT: vmaxnm.f32 s13, s1, s7
+; CHECK-NEXT: vmaxnm.f32 s7, s0, s7
+; CHECK-NEXT: vminnm.f32 s6, s6, s5
+; CHECK-NEXT: vminnm.f32 s10, s10, s5
+; CHECK-NEXT: vminnm.f32 s14, s14, s5
+; CHECK-NEXT: vminnm.f32 s3, s3, s5
+; CHECK-NEXT: vminnm.f32 s11, s11, s5
+; CHECK-NEXT: vminnm.f32 s13, s13, s5
+; CHECK-NEXT: vminnm.f32 s5, s7, s5
+; CHECK-NEXT: vcmp.f32 s15, s15
+; CHECK-NEXT: vcvt.u32.f32 s5, s5
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vmov r2, s16
+; CHECK-NEXT: mov.w r1, #0
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r2, #0
+; CHECK-NEXT: vcvt.u32.f32 s13, s13
; CHECK-NEXT: and r2, r2, #1
+; CHECK-NEXT: vcmp.f32 s0, s0
; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: bfi r1, r2, #0, #1
-; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: vcvt.u32.f32 s11, s11
+; CHECK-NEXT: vcmp.f32 s1, s1
+; CHECK-NEXT: vmov r2, s5
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r2, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: and r2, r2, #1
-; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: vcvt.u32.f32 s3, s3
+; CHECK-NEXT: rsb.w r2, r2, #0
+; CHECK-NEXT: vcmp.f32 s9, s9
; CHECK-NEXT: bfi r1, r2, #1, #1
-; CHECK-NEXT: vmov r2, s1
+; CHECK-NEXT: vmov r2, s13
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r2, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: and r2, r2, #1
-; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: vcvt.u32.f32 s14, s14
+; CHECK-NEXT: rsb.w r2, r2, #0
+; CHECK-NEXT: vcmp.f32 s2, s2
; CHECK-NEXT: bfi r1, r2, #2, #1
-; CHECK-NEXT: vmov r2, s14
+; CHECK-NEXT: vmov r2, s11
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r2, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: and r2, r2, #1
-; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: vcvt.u32.f32 s10, s10
+; CHECK-NEXT: rsb.w r2, r2, #0
+; CHECK-NEXT: vcmp.f32 s12, s12
; CHECK-NEXT: bfi r1, r2, #3, #1
-; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: vmov r2, s3
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r2, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: and r2, r2, #1
-; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: vcvt.u32.f32 s6, s6
+; CHECK-NEXT: rsb.w r2, r2, #0
+; CHECK-NEXT: vcmp.f32 s8, s8
; CHECK-NEXT: bfi r1, r2, #4, #1
-; CHECK-NEXT: vmov r2, s12
+; CHECK-NEXT: vmov r2, s14
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r2, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: and r2, r2, #1
-; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: vcmp.f32 s4, s4
+; CHECK-NEXT: rsb.w r2, r2, #0
; CHECK-NEXT: bfi r1, r2, #5, #1
; CHECK-NEXT: vmov r2, s10
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r2, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: and r2, r2, #1
-; CHECK-NEXT: rsbs r2, r2, #0
+; CHECK-NEXT: rsb.w r2, r2, #0
; CHECK-NEXT: bfi r1, r2, #6, #1
-; CHECK-NEXT: vmov r2, s8
+; CHECK-NEXT: vmov r2, s6
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r2, #0
; CHECK-NEXT: and r2, r2, #1
; CHECK-NEXT: rsbs r2, r2, #0
; CHECK-NEXT: bfi r1, r2, #7, #1
; CHECK-NEXT: strb r1, [r0]
+; CHECK-NEXT: vpop {d8}
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.1:
@@ -2990,57 +3105,94 @@ define arm_aapcs_vfpcc <8 x i1> @test_unsigned_v8f16_v8i1(<8 x half> %f) {
define arm_aapcs_vfpcc <8 x i8> @test_unsigned_v8f16_v8i8(<8 x half> %f) {
; CHECK-MVE-LABEL: test_unsigned_v8f16_v8i8:
; CHECK-MVE: @ %bb.0:
+; CHECK-MVE-NEXT: .save {r4, r5, r7, lr}
+; CHECK-MVE-NEXT: push {r4, r5, r7, lr}
+; CHECK-MVE-NEXT: .vsave {d8}
+; CHECK-MVE-NEXT: vpush {d8}
; CHECK-MVE-NEXT: vldr s6, .LCPI43_1
-; CHECK-MVE-NEXT: vcvtt.f32.f16 s10, s2
-; CHECK-MVE-NEXT: vcvtb.f32.f16 s2, s2
+; CHECK-MVE-NEXT: vcvtt.f32.f16 s13, s3
+; CHECK-MVE-NEXT: vcvtb.f32.f16 s3, s3
; CHECK-MVE-NEXT: vldr s4, .LCPI43_0
-; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s6
-; CHECK-MVE-NEXT: vcvtt.f32.f16 s8, s3
-; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s4
-; CHECK-MVE-NEXT: vcvtb.f32.f16 s12, s3
-; CHECK-MVE-NEXT: vcvt.u32.f32 s5, s2
-; CHECK-MVE-NEXT: vcvtt.f32.f16 s2, s0
-; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s6
-; CHECK-MVE-NEXT: vcvtb.f32.f16 s0, s0
-; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s6
-; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s4
-; CHECK-MVE-NEXT: vminnm.f32 s0, s0, s4
-; CHECK-MVE-NEXT: vcvt.u32.f32 s7, s2
-; CHECK-MVE-NEXT: vcvtb.f32.f16 s2, s1
-; CHECK-MVE-NEXT: vcvtt.f32.f16 s14, s1
-; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s6
-; CHECK-MVE-NEXT: vcvt.u32.f32 s0, s0
-; CHECK-MVE-NEXT: vmaxnm.f32 s8, s8, s6
-; CHECK-MVE-NEXT: vmaxnm.f32 s10, s10, s6
-; CHECK-MVE-NEXT: vmaxnm.f32 s12, s12, s6
-; CHECK-MVE-NEXT: vmaxnm.f32 s14, s14, s6
-; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s4
-; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s4
+; CHECK-MVE-NEXT: vmaxnm.f32 s16, s3, s6
+; CHECK-MVE-NEXT: vcvtt.f32.f16 s7, s2
+; CHECK-MVE-NEXT: vmaxnm.f32 s15, s13, s6
+; CHECK-MVE-NEXT: vminnm.f32 s16, s16, s4
+; CHECK-MVE-NEXT: vcvtb.f32.f16 s2, s2
+; CHECK-MVE-NEXT: vminnm.f32 s15, s15, s4
+; CHECK-MVE-NEXT: vmaxnm.f32 s11, s2, s6
+; CHECK-MVE-NEXT: vcvt.u32.f32 s16, s16
+; CHECK-MVE-NEXT: vcvtt.f32.f16 s12, s1
+; CHECK-MVE-NEXT: vmaxnm.f32 s9, s7, s6
+; CHECK-MVE-NEXT: vminnm.f32 s11, s11, s4
+; CHECK-MVE-NEXT: vcvtb.f32.f16 s1, s1
+; CHECK-MVE-NEXT: vcvt.u32.f32 s15, s15
+; CHECK-MVE-NEXT: vcvtb.f32.f16 s8, s0
+; CHECK-MVE-NEXT: vmaxnm.f32 s5, s1, s6
+; CHECK-MVE-NEXT: vminnm.f32 s9, s9, s4
+; CHECK-MVE-NEXT: vcvt.u32.f32 s11, s11
+; CHECK-MVE-NEXT: vmaxnm.f32 s10, s8, s6
+; CHECK-MVE-NEXT: vmaxnm.f32 s14, s12, s6
+; CHECK-MVE-NEXT: vminnm.f32 s5, s5, s4
+; CHECK-MVE-NEXT: vcvt.u32.f32 s9, s9
+; CHECK-MVE-NEXT: vcvtt.f32.f16 s0, s0
; CHECK-MVE-NEXT: vminnm.f32 s10, s10, s4
-; CHECK-MVE-NEXT: vminnm.f32 s12, s12, s4
; CHECK-MVE-NEXT: vminnm.f32 s14, s14, s4
-; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s2
-; CHECK-MVE-NEXT: vcvt.u32.f32 s14, s14
+; CHECK-MVE-NEXT: vcvt.u32.f32 s5, s5
+; CHECK-MVE-NEXT: vmaxnm.f32 s6, s0, s6
+; CHECK-MVE-NEXT: vminnm.f32 s4, s6, s4
; CHECK-MVE-NEXT: vcvt.u32.f32 s10, s10
-; CHECK-MVE-NEXT: vcvt.u32.f32 s12, s12
-; CHECK-MVE-NEXT: vmov r0, s0
-; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s8
-; CHECK-MVE-NEXT: vmov.16 q0[0], r0
-; CHECK-MVE-NEXT: vmov r0, s7
-; CHECK-MVE-NEXT: vmov.16 q0[1], r0
-; CHECK-MVE-NEXT: vmov r0, s4
-; CHECK-MVE-NEXT: vmov.16 q0[2], r0
-; CHECK-MVE-NEXT: vmov r0, s14
-; CHECK-MVE-NEXT: vmov.16 q0[3], r0
+; CHECK-MVE-NEXT: vcvt.u32.f32 s14, s14
+; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s4
+; CHECK-MVE-NEXT: vcmp.f32 s3, s3
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vmov r12, s16
+; CHECK-MVE-NEXT: vcmp.f32 s13, s13
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs.w r12, #0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vmov lr, s15
+; CHECK-MVE-NEXT: vcmp.f32 s2, s2
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs.w lr, #0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vmov r2, s11
+; CHECK-MVE-NEXT: vcmp.f32 s7, s7
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r2, #0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vmov r3, s9
+; CHECK-MVE-NEXT: vcmp.f32 s1, s1
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r3, #0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s5
-; CHECK-MVE-NEXT: vmov.16 q0[4], r0
-; CHECK-MVE-NEXT: vmov r0, s10
-; CHECK-MVE-NEXT: vmov.16 q0[5], r0
-; CHECK-MVE-NEXT: vmov r0, s12
-; CHECK-MVE-NEXT: vmov.16 q0[6], r0
-; CHECK-MVE-NEXT: vmov r0, s8
-; CHECK-MVE-NEXT: vmov.16 q0[7], r0
-; CHECK-MVE-NEXT: bx lr
+; CHECK-MVE-NEXT: vcmp.f32 s12, s12
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r0, #0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vmov r1, s14
+; CHECK-MVE-NEXT: vmov r4, s10
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r1, #0
+; CHECK-MVE-NEXT: vcmp.f32 s8, s8
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r4, #0
+; CHECK-MVE-NEXT: vcmp.f32 s0, s0
+; CHECK-MVE-NEXT: vmov.16 q0[0], r4
+; CHECK-MVE-NEXT: vmov r5, s4
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r5, #0
+; CHECK-MVE-NEXT: vmov.16 q0[1], r5
+; CHECK-MVE-NEXT: vmov.16 q0[2], r0
+; CHECK-MVE-NEXT: vmov.16 q0[3], r1
+; CHECK-MVE-NEXT: vmov.16 q0[4], r2
+; CHECK-MVE-NEXT: vmov.16 q0[5], r3
+; CHECK-MVE-NEXT: vmov.16 q0[6], r12
+; CHECK-MVE-NEXT: vmov.16 q0[7], lr
+; CHECK-MVE-NEXT: vpop {d8}
+; CHECK-MVE-NEXT: pop {r4, r5, r7, pc}
; CHECK-MVE-NEXT: .p2align 2
; CHECK-MVE-NEXT: @ %bb.1:
; CHECK-MVE-NEXT: .LCPI43_0:
@@ -3061,57 +3213,94 @@ define arm_aapcs_vfpcc <8 x i8> @test_unsigned_v8f16_v8i8(<8 x half> %f) {
define arm_aapcs_vfpcc <8 x i13> @test_unsigned_v8f16_v8i13(<8 x half> %f) {
; CHECK-MVE-LABEL: test_unsigned_v8f16_v8i13:
; CHECK-MVE: @ %bb.0:
+; CHECK-MVE-NEXT: .save {r4, r5, r7, lr}
+; CHECK-MVE-NEXT: push {r4, r5, r7, lr}
+; CHECK-MVE-NEXT: .vsave {d8}
+; CHECK-MVE-NEXT: vpush {d8}
; CHECK-MVE-NEXT: vldr s6, .LCPI44_1
-; CHECK-MVE-NEXT: vcvtt.f32.f16 s10, s2
-; CHECK-MVE-NEXT: vcvtb.f32.f16 s2, s2
+; CHECK-MVE-NEXT: vcvtt.f32.f16 s13, s3
+; CHECK-MVE-NEXT: vcvtb.f32.f16 s3, s3
; CHECK-MVE-NEXT: vldr s4, .LCPI44_0
-; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s6
-; CHECK-MVE-NEXT: vcvtt.f32.f16 s8, s3
-; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s4
-; CHECK-MVE-NEXT: vcvtb.f32.f16 s12, s3
-; CHECK-MVE-NEXT: vcvt.u32.f32 s5, s2
-; CHECK-MVE-NEXT: vcvtt.f32.f16 s2, s0
-; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s6
-; CHECK-MVE-NEXT: vcvtb.f32.f16 s0, s0
-; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s6
-; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s4
-; CHECK-MVE-NEXT: vminnm.f32 s0, s0, s4
-; CHECK-MVE-NEXT: vcvt.u32.f32 s7, s2
-; CHECK-MVE-NEXT: vcvtb.f32.f16 s2, s1
-; CHECK-MVE-NEXT: vcvtt.f32.f16 s14, s1
-; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s6
-; CHECK-MVE-NEXT: vcvt.u32.f32 s0, s0
-; CHECK-MVE-NEXT: vmaxnm.f32 s8, s8, s6
-; CHECK-MVE-NEXT: vmaxnm.f32 s10, s10, s6
-; CHECK-MVE-NEXT: vmaxnm.f32 s12, s12, s6
-; CHECK-MVE-NEXT: vmaxnm.f32 s14, s14, s6
-; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s4
-; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s4
+; CHECK-MVE-NEXT: vmaxnm.f32 s16, s3, s6
+; CHECK-MVE-NEXT: vcvtt.f32.f16 s7, s2
+; CHECK-MVE-NEXT: vmaxnm.f32 s15, s13, s6
+; CHECK-MVE-NEXT: vminnm.f32 s16, s16, s4
+; CHECK-MVE-NEXT: vcvtb.f32.f16 s2, s2
+; CHECK-MVE-NEXT: vminnm.f32 s15, s15, s4
+; CHECK-MVE-NEXT: vmaxnm.f32 s11, s2, s6
+; CHECK-MVE-NEXT: vcvt.u32.f32 s16, s16
+; CHECK-MVE-NEXT: vcvtt.f32.f16 s12, s1
+; CHECK-MVE-NEXT: vmaxnm.f32 s9, s7, s6
+; CHECK-MVE-NEXT: vminnm.f32 s11, s11, s4
+; CHECK-MVE-NEXT: vcvtb.f32.f16 s1, s1
+; CHECK-MVE-NEXT: vcvt.u32.f32 s15, s15
+; CHECK-MVE-NEXT: vcvtb.f32.f16 s8, s0
+; CHECK-MVE-NEXT: vmaxnm.f32 s5, s1, s6
+; CHECK-MVE-NEXT: vminnm.f32 s9, s9, s4
+; CHECK-MVE-NEXT: vcvt.u32.f32 s11, s11
+; CHECK-MVE-NEXT: vmaxnm.f32 s10, s8, s6
+; CHECK-MVE-NEXT: vmaxnm.f32 s14, s12, s6
+; CHECK-MVE-NEXT: vminnm.f32 s5, s5, s4
+; CHECK-MVE-NEXT: vcvt.u32.f32 s9, s9
+; CHECK-MVE-NEXT: vcvtt.f32.f16 s0, s0
; CHECK-MVE-NEXT: vminnm.f32 s10, s10, s4
-; CHECK-MVE-NEXT: vminnm.f32 s12, s12, s4
; CHECK-MVE-NEXT: vminnm.f32 s14, s14, s4
-; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s2
-; CHECK-MVE-NEXT: vcvt.u32.f32 s14, s14
+; CHECK-MVE-NEXT: vcvt.u32.f32 s5, s5
+; CHECK-MVE-NEXT: vmaxnm.f32 s6, s0, s6
+; CHECK-MVE-NEXT: vminnm.f32 s4, s6, s4
; CHECK-MVE-NEXT: vcvt.u32.f32 s10, s10
-; CHECK-MVE-NEXT: vcvt.u32.f32 s12, s12
-; CHECK-MVE-NEXT: vmov r0, s0
-; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s8
-; CHECK-MVE-NEXT: vmov.16 q0[0], r0
-; CHECK-MVE-NEXT: vmov r0, s7
-; CHECK-MVE-NEXT: vmov.16 q0[1], r0
-; CHECK-MVE-NEXT: vmov r0, s4
-; CHECK-MVE-NEXT: vmov.16 q0[2], r0
-; CHECK-MVE-NEXT: vmov r0, s14
-; CHECK-MVE-NEXT: vmov.16 q0[3], r0
+; CHECK-MVE-NEXT: vcvt.u32.f32 s14, s14
+; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s4
+; CHECK-MVE-NEXT: vcmp.f32 s3, s3
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vmov r12, s16
+; CHECK-MVE-NEXT: vcmp.f32 s13, s13
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs.w r12, #0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vmov lr, s15
+; CHECK-MVE-NEXT: vcmp.f32 s2, s2
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs.w lr, #0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vmov r2, s11
+; CHECK-MVE-NEXT: vcmp.f32 s7, s7
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r2, #0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vmov r3, s9
+; CHECK-MVE-NEXT: vcmp.f32 s1, s1
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r3, #0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s5
-; CHECK-MVE-NEXT: vmov.16 q0[4], r0
-; CHECK-MVE-NEXT: vmov r0, s10
-; CHECK-MVE-NEXT: vmov.16 q0[5], r0
-; CHECK-MVE-NEXT: vmov r0, s12
-; CHECK-MVE-NEXT: vmov.16 q0[6], r0
-; CHECK-MVE-NEXT: vmov r0, s8
-; CHECK-MVE-NEXT: vmov.16 q0[7], r0
-; CHECK-MVE-NEXT: bx lr
+; CHECK-MVE-NEXT: vcmp.f32 s12, s12
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r0, #0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vmov r1, s14
+; CHECK-MVE-NEXT: vmov r4, s10
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r1, #0
+; CHECK-MVE-NEXT: vcmp.f32 s8, s8
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r4, #0
+; CHECK-MVE-NEXT: vcmp.f32 s0, s0
+; CHECK-MVE-NEXT: vmov.16 q0[0], r4
+; CHECK-MVE-NEXT: vmov r5, s4
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r5, #0
+; CHECK-MVE-NEXT: vmov.16 q0[1], r5
+; CHECK-MVE-NEXT: vmov.16 q0[2], r0
+; CHECK-MVE-NEXT: vmov.16 q0[3], r1
+; CHECK-MVE-NEXT: vmov.16 q0[4], r2
+; CHECK-MVE-NEXT: vmov.16 q0[5], r3
+; CHECK-MVE-NEXT: vmov.16 q0[6], r12
+; CHECK-MVE-NEXT: vmov.16 q0[7], lr
+; CHECK-MVE-NEXT: vpop {d8}
+; CHECK-MVE-NEXT: pop {r4, r5, r7, pc}
; CHECK-MVE-NEXT: .p2align 2
; CHECK-MVE-NEXT: @ %bb.1:
; CHECK-MVE-NEXT: .LCPI44_0:
@@ -3132,57 +3321,94 @@ define arm_aapcs_vfpcc <8 x i13> @test_unsigned_v8f16_v8i13(<8 x half> %f) {
define arm_aapcs_vfpcc <8 x i16> @test_unsigned_v8f16_v8i16(<8 x half> %f) {
; CHECK-MVE-LABEL: test_unsigned_v8f16_v8i16:
; CHECK-MVE: @ %bb.0:
+; CHECK-MVE-NEXT: .save {r4, r5, r7, lr}
+; CHECK-MVE-NEXT: push {r4, r5, r7, lr}
+; CHECK-MVE-NEXT: .vsave {d8}
+; CHECK-MVE-NEXT: vpush {d8}
; CHECK-MVE-NEXT: vldr s6, .LCPI45_1
-; CHECK-MVE-NEXT: vcvtt.f32.f16 s10, s2
-; CHECK-MVE-NEXT: vcvtb.f32.f16 s2, s2
+; CHECK-MVE-NEXT: vcvtt.f32.f16 s13, s3
+; CHECK-MVE-NEXT: vcvtb.f32.f16 s3, s3
; CHECK-MVE-NEXT: vldr s4, .LCPI45_0
-; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s6
-; CHECK-MVE-NEXT: vcvtt.f32.f16 s8, s3
-; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s4
-; CHECK-MVE-NEXT: vcvtb.f32.f16 s12, s3
-; CHECK-MVE-NEXT: vcvt.u32.f32 s5, s2
-; CHECK-MVE-NEXT: vcvtt.f32.f16 s2, s0
-; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s6
-; CHECK-MVE-NEXT: vcvtb.f32.f16 s0, s0
-; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s6
-; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s4
-; CHECK-MVE-NEXT: vminnm.f32 s0, s0, s4
-; CHECK-MVE-NEXT: vcvt.u32.f32 s7, s2
-; CHECK-MVE-NEXT: vcvtb.f32.f16 s2, s1
-; CHECK-MVE-NEXT: vcvtt.f32.f16 s14, s1
-; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s6
-; CHECK-MVE-NEXT: vcvt.u32.f32 s0, s0
-; CHECK-MVE-NEXT: vmaxnm.f32 s8, s8, s6
-; CHECK-MVE-NEXT: vmaxnm.f32 s10, s10, s6
-; CHECK-MVE-NEXT: vmaxnm.f32 s12, s12, s6
-; CHECK-MVE-NEXT: vmaxnm.f32 s14, s14, s6
-; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s4
-; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s4
+; CHECK-MVE-NEXT: vmaxnm.f32 s16, s3, s6
+; CHECK-MVE-NEXT: vcvtt.f32.f16 s7, s2
+; CHECK-MVE-NEXT: vmaxnm.f32 s15, s13, s6
+; CHECK-MVE-NEXT: vminnm.f32 s16, s16, s4
+; CHECK-MVE-NEXT: vcvtb.f32.f16 s2, s2
+; CHECK-MVE-NEXT: vminnm.f32 s15, s15, s4
+; CHECK-MVE-NEXT: vmaxnm.f32 s11, s2, s6
+; CHECK-MVE-NEXT: vcvt.u32.f32 s16, s16
+; CHECK-MVE-NEXT: vcvtt.f32.f16 s12, s1
+; CHECK-MVE-NEXT: vmaxnm.f32 s9, s7, s6
+; CHECK-MVE-NEXT: vminnm.f32 s11, s11, s4
+; CHECK-MVE-NEXT: vcvtb.f32.f16 s1, s1
+; CHECK-MVE-NEXT: vcvt.u32.f32 s15, s15
+; CHECK-MVE-NEXT: vcvtb.f32.f16 s8, s0
+; CHECK-MVE-NEXT: vmaxnm.f32 s5, s1, s6
+; CHECK-MVE-NEXT: vminnm.f32 s9, s9, s4
+; CHECK-MVE-NEXT: vcvt.u32.f32 s11, s11
+; CHECK-MVE-NEXT: vmaxnm.f32 s10, s8, s6
+; CHECK-MVE-NEXT: vmaxnm.f32 s14, s12, s6
+; CHECK-MVE-NEXT: vminnm.f32 s5, s5, s4
+; CHECK-MVE-NEXT: vcvt.u32.f32 s9, s9
+; CHECK-MVE-NEXT: vcvtt.f32.f16 s0, s0
; CHECK-MVE-NEXT: vminnm.f32 s10, s10, s4
-; CHECK-MVE-NEXT: vminnm.f32 s12, s12, s4
; CHECK-MVE-NEXT: vminnm.f32 s14, s14, s4
-; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s2
-; CHECK-MVE-NEXT: vcvt.u32.f32 s14, s14
+; CHECK-MVE-NEXT: vcvt.u32.f32 s5, s5
+; CHECK-MVE-NEXT: vmaxnm.f32 s6, s0, s6
+; CHECK-MVE-NEXT: vminnm.f32 s4, s6, s4
; CHECK-MVE-NEXT: vcvt.u32.f32 s10, s10
-; CHECK-MVE-NEXT: vcvt.u32.f32 s12, s12
-; CHECK-MVE-NEXT: vmov r0, s0
-; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s8
-; CHECK-MVE-NEXT: vmov.16 q0[0], r0
-; CHECK-MVE-NEXT: vmov r0, s7
-; CHECK-MVE-NEXT: vmov.16 q0[1], r0
-; CHECK-MVE-NEXT: vmov r0, s4
-; CHECK-MVE-NEXT: vmov.16 q0[2], r0
-; CHECK-MVE-NEXT: vmov r0, s14
-; CHECK-MVE-NEXT: vmov.16 q0[3], r0
+; CHECK-MVE-NEXT: vcvt.u32.f32 s14, s14
+; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s4
+; CHECK-MVE-NEXT: vcmp.f32 s3, s3
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vmov r12, s16
+; CHECK-MVE-NEXT: vcmp.f32 s13, s13
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs.w r12, #0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vmov lr, s15
+; CHECK-MVE-NEXT: vcmp.f32 s2, s2
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs.w lr, #0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vmov r2, s11
+; CHECK-MVE-NEXT: vcmp.f32 s7, s7
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r2, #0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vmov r3, s9
+; CHECK-MVE-NEXT: vcmp.f32 s1, s1
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r3, #0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r0, s5
-; CHECK-MVE-NEXT: vmov.16 q0[4], r0
-; CHECK-MVE-NEXT: vmov r0, s10
-; CHECK-MVE-NEXT: vmov.16 q0[5], r0
-; CHECK-MVE-NEXT: vmov r0, s12
-; CHECK-MVE-NEXT: vmov.16 q0[6], r0
-; CHECK-MVE-NEXT: vmov r0, s8
-; CHECK-MVE-NEXT: vmov.16 q0[7], r0
-; CHECK-MVE-NEXT: bx lr
+; CHECK-MVE-NEXT: vcmp.f32 s12, s12
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r0, #0
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: vmov r1, s14
+; CHECK-MVE-NEXT: vmov r4, s10
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r1, #0
+; CHECK-MVE-NEXT: vcmp.f32 s8, s8
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r4, #0
+; CHECK-MVE-NEXT: vcmp.f32 s0, s0
+; CHECK-MVE-NEXT: vmov.16 q0[0], r4
+; CHECK-MVE-NEXT: vmov r5, s4
+; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-MVE-NEXT: it vs
+; CHECK-MVE-NEXT: movvs r5, #0
+; CHECK-MVE-NEXT: vmov.16 q0[1], r5
+; CHECK-MVE-NEXT: vmov.16 q0[2], r0
+; CHECK-MVE-NEXT: vmov.16 q0[3], r1
+; CHECK-MVE-NEXT: vmov.16 q0[4], r2
+; CHECK-MVE-NEXT: vmov.16 q0[5], r3
+; CHECK-MVE-NEXT: vmov.16 q0[6], r12
+; CHECK-MVE-NEXT: vmov.16 q0[7], lr
+; CHECK-MVE-NEXT: vpop {d8}
+; CHECK-MVE-NEXT: pop {r4, r5, r7, pc}
; CHECK-MVE-NEXT: .p2align 2
; CHECK-MVE-NEXT: @ %bb.1:
; CHECK-MVE-NEXT: .LCPI45_0:
@@ -3201,90 +3427,124 @@ define arm_aapcs_vfpcc <8 x i16> @test_unsigned_v8f16_v8i16(<8 x half> %f) {
define arm_aapcs_vfpcc <8 x i19> @test_unsigned_v8f16_v8i19(<8 x half> %f) {
; CHECK-LABEL: test_unsigned_v8f16_v8i19:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r4, r5, r6, r7, r9, r11, lr}
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r9, r11, lr}
-; CHECK-NEXT: vldr s4, .LCPI46_0
-; CHECK-NEXT: vcvtb.f32.f16 s14, s1
+; CHECK-NEXT: .save {r4, r5, r7, r9, r11, lr}
+; CHECK-NEXT: push.w {r4, r5, r7, r9, r11, lr}
; CHECK-NEXT: vldr s6, .LCPI46_1
-; CHECK-NEXT: vcvtt.f32.f16 s12, s1
-; CHECK-NEXT: vmaxnm.f32 s14, s14, s4
-; CHECK-NEXT: vmaxnm.f32 s12, s12, s4
-; CHECK-NEXT: vminnm.f32 s14, s14, s6
-; CHECK-NEXT: vminnm.f32 s12, s12, s6
-; CHECK-NEXT: vcvt.u32.f32 s14, s14
-; CHECK-NEXT: vcvtb.f32.f16 s10, s0
-; CHECK-NEXT: vcvt.u32.f32 s12, s12
+; CHECK-NEXT: vcvtb.f32.f16 s12, s0
; CHECK-NEXT: vcvtt.f32.f16 s0, s0
-; CHECK-NEXT: vmaxnm.f32 s0, s0, s4
-; CHECK-NEXT: vmaxnm.f32 s10, s10, s4
-; CHECK-NEXT: vminnm.f32 s0, s0, s6
-; CHECK-NEXT: vminnm.f32 s10, s10, s6
-; CHECK-NEXT: vcvt.u32.f32 s0, s0
-; CHECK-NEXT: movs r1, #0
+; CHECK-NEXT: vldr s4, .LCPI46_0
+; CHECK-NEXT: vmaxnm.f32 s5, s0, s6
+; CHECK-NEXT: vcvtb.f32.f16 s8, s1
+; CHECK-NEXT: vmaxnm.f32 s14, s12, s6
+; CHECK-NEXT: vminnm.f32 s5, s5, s4
+; CHECK-NEXT: vmaxnm.f32 s10, s8, s6
+; CHECK-NEXT: vminnm.f32 s14, s14, s4
+; CHECK-NEXT: vcvt.u32.f32 s5, s5
+; CHECK-NEXT: vminnm.f32 s10, s10, s4
+; CHECK-NEXT: vcvt.u32.f32 s14, s14
+; CHECK-NEXT: vcvtt.f32.f16 s1, s1
; CHECK-NEXT: vcvt.u32.f32 s10, s10
-; CHECK-NEXT: vcvtt.f32.f16 s8, s2
-; CHECK-NEXT: vcvtb.f32.f16 s2, s2
-; CHECK-NEXT: vmaxnm.f32 s8, s8, s4
-; CHECK-NEXT: vmov r2, s14
-; CHECK-NEXT: vmaxnm.f32 s2, s2, s4
-; CHECK-NEXT: vmov r4, s12
-; CHECK-NEXT: vminnm.f32 s2, s2, s6
-; CHECK-NEXT: vcvt.u32.f32 s2, s2
-; CHECK-NEXT: vminnm.f32 s8, s8, s6
-; CHECK-NEXT: vcvt.u32.f32 s8, s8
-; CHECK-NEXT: mov.w r11, #0
-; CHECK-NEXT: vmov r12, s0
-; CHECK-NEXT: vcvtt.f32.f16 s0, s3
-; CHECK-NEXT: lsll r12, r1, #19
-; CHECK-NEXT: vmaxnm.f32 s0, s0, s4
-; CHECK-NEXT: vminnm.f32 s0, s0, s6
+; CHECK-NEXT: vmaxnm.f32 s7, s1, s6
+; CHECK-NEXT: vcmp.f32 s0, s0
; CHECK-NEXT: movs r5, #0
-; CHECK-NEXT: vcvt.u32.f32 s0, s0
-; CHECK-NEXT: movs r7, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s12, s12
+; CHECK-NEXT: vminnm.f32 s7, s7, s4
+; CHECK-NEXT: vcvtb.f32.f16 s0, s2
+; CHECK-NEXT: vcvt.u32.f32 s7, s7
+; CHECK-NEXT: mov.w r11, #0
+; CHECK-NEXT: vmov r2, s5
; CHECK-NEXT: mov.w r9, #0
-; CHECK-NEXT: movs r3, #0
-; CHECK-NEXT: orr.w r1, r1, r2, lsl #6
-; CHECK-NEXT: lsrl r2, r5, #26
-; CHECK-NEXT: orr.w r1, r1, r4, lsl #25
-; CHECK-NEXT: str r1, [r0, #4]
-; CHECK-NEXT: vmov r1, s10
-; CHECK-NEXT: lsrl r4, r11, #7
-; CHECK-NEXT: orr.w r1, r1, r12
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r2, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vmov r1, s14
+; CHECK-NEXT: lsll r2, r5, #19
+; CHECK-NEXT: vcmp.f32 s8, s8
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r1, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vmov r3, s10
+; CHECK-NEXT: vcmp.f32 s1, s1
+; CHECK-NEXT: vmaxnm.f32 s8, s0, s6
+; CHECK-NEXT: orr.w r1, r1, r2
; CHECK-NEXT: str r1, [r0]
-; CHECK-NEXT: orr.w r1, r2, r4
-; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r3, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s0, s0
+; CHECK-NEXT: vcvtt.f32.f16 s0, s2
+; CHECK-NEXT: vmaxnm.f32 s2, s0, s6
+; CHECK-NEXT: vminnm.f32 s8, s8, s4
+; CHECK-NEXT: vminnm.f32 s2, s2, s4
+; CHECK-NEXT: vcvt.u32.f32 s8, s8
+; CHECK-NEXT: vcvt.u32.f32 s2, s2
+; CHECK-NEXT: mov r4, r3
+; CHECK-NEXT: vmov r12, s7
+; CHECK-NEXT: lsrl r4, r9, #26
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs.w r12, #0
+; CHECK-NEXT: mov r2, r12
+; CHECK-NEXT: lsrl r2, r11, #7
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s0, s0
+; CHECK-NEXT: vcvtb.f32.f16 s0, s3
+; CHECK-NEXT: orr.w r1, r4, r2
+; CHECK-NEXT: mov.w r7, #0
+; CHECK-NEXT: vmov r2, s8
+; CHECK-NEXT: vmov r4, s2
+; CHECK-NEXT: vmaxnm.f32 s2, s0, s6
+; CHECK-NEXT: vminnm.f32 s2, s2, s4
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r2, #0
+; CHECK-NEXT: vcvt.u32.f32 s2, s2
; CHECK-NEXT: lsll r2, r7, #12
-; CHECK-NEXT: vmov r4, s8
-; CHECK-NEXT: orrs r2, r1
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: orr.w r2, r2, r1
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r4, #0
; CHECK-NEXT: movs r1, #0
; CHECK-NEXT: lsll r4, r1, #31
-; CHECK-NEXT: orr.w r12, r2, r4
-; CHECK-NEXT: vmov r4, s0
-; CHECK-NEXT: vcvtb.f32.f16 s0, s3
-; CHECK-NEXT: lsll r4, r3, #5
-; CHECK-NEXT: vmaxnm.f32 s0, s0, s4
-; CHECK-NEXT: vminnm.f32 s0, s0, s6
-; CHECK-NEXT: vcvt.u32.f32 s0, s0
-; CHECK-NEXT: vmov r2, s0
-; CHECK-NEXT: mov r6, r2
-; CHECK-NEXT: lsrl r6, r9, #14
-; CHECK-NEXT: orr.w r3, r6, r4
-; CHECK-NEXT: strh r3, [r0, #16]
-; CHECK-NEXT: str.w r12, [r0, #8]
-; CHECK-NEXT: lsrs r3, r3, #16
-; CHECK-NEXT: strb r3, [r0, #18]
-; CHECK-NEXT: orr.w r3, r5, r11
-; CHECK-NEXT: orrs r3, r7
-; CHECK-NEXT: orrs r1, r3
-; CHECK-NEXT: orr.w r1, r1, r2, lsl #18
+; CHECK-NEXT: vcmp.f32 s0, s0
+; CHECK-NEXT: orrs r2, r4
+; CHECK-NEXT: vcvtt.f32.f16 s0, s3
+; CHECK-NEXT: str r2, [r0, #8]
+; CHECK-NEXT: orr.w r2, r5, r3, lsl #6
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s0, s0
+; CHECK-NEXT: vmov r3, s2
+; CHECK-NEXT: vmaxnm.f32 s2, s0, s6
+; CHECK-NEXT: vminnm.f32 s2, s2, s4
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r3, #0
+; CHECK-NEXT: vcvt.u32.f32 s2, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: mov.w r5, #0
+; CHECK-NEXT: orr.w r12, r2, r12, lsl #25
+; CHECK-NEXT: mov r2, r3
+; CHECK-NEXT: vmov r4, s2
+; CHECK-NEXT: it vs
+; CHECK-NEXT: movvs r4, #0
+; CHECK-NEXT: lsll r4, r5, #5
+; CHECK-NEXT: movs r5, #0
+; CHECK-NEXT: lsrl r2, r5, #14
+; CHECK-NEXT: orrs r2, r4
+; CHECK-NEXT: strh r2, [r0, #16]
+; CHECK-NEXT: str.w r12, [r0, #4]
+; CHECK-NEXT: lsrs r2, r2, #16
+; CHECK-NEXT: strb r2, [r0, #18]
+; CHECK-NEXT: orr.w r2, r9, r11
+; CHECK-NEXT: orrs r2, r7
+; CHECK-NEXT: orrs r1, r2
+; CHECK-NEXT: orr.w r1, r1, r3, lsl #18
; CHECK-NEXT: str r1, [r0, #12]
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r9, r11, pc}
+; CHECK-NEXT: pop.w {r4, r5, r7, r9, r11, pc}
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI46_0:
-; CHECK-NEXT: .long 0x00000000 @ float 0
-; CHECK-NEXT: .LCPI46_1:
; CHECK-NEXT: .long 0x48ffffe0 @ float 524287
+; CHECK-NEXT: .LCPI46_1:
+; CHECK-NEXT: .long 0x00000000 @ float 0
%x = call <8 x i19> @llvm.fptoui.sat.v8f16.v8i19(<8 x half> %f)
ret <8 x i19> %x
}
diff --git a/llvm/test/CodeGen/X86/fpclamptosat.ll b/llvm/test/CodeGen/X86/fpclamptosat.ll
index 67483be7a3fde..1a1d3da844263 100644
--- a/llvm/test/CodeGen/X86/fpclamptosat.ll
+++ b/llvm/test/CodeGen/X86/fpclamptosat.ll
@@ -337,16 +337,12 @@ define i16 @stest_f16i16(half %x) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: movl $32768, %ecx # imm = 0x8000
-; CHECK-NEXT: cmovael %eax, %ecx
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: movl $32767, %edx # imm = 0x7FFF
-; CHECK-NEXT: cmovbel %ecx, %edx
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
-; CHECK-NEXT: cmovnpl %edx, %eax
+; CHECK-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %ecx
+; CHECK-NEXT: cmovnpl %ecx, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: popq %rcx
; CHECK-NEXT: retq
@@ -925,16 +921,12 @@ define i16 @stest_f16i16_mm(half %x) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: movl $32768, %ecx # imm = 0x8000
-; CHECK-NEXT: cmovael %eax, %ecx
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: movl $32767, %edx # imm = 0x7FFF
-; CHECK-NEXT: cmovbel %ecx, %edx
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
-; CHECK-NEXT: cmovnpl %edx, %eax
+; CHECK-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %ecx
+; CHECK-NEXT: cmovnpl %ecx, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: popq %rcx
; CHECK-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll b/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll
index 85f4c945230e1..9b7a43a29a942 100644
--- a/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll
@@ -1542,10 +1542,12 @@ define i50 @test_signed_i50_f64(double %f) nounwind {
;
; X86-SSE-LABEL: test_signed_i50_f64:
; X86-SSE: # %bb.0:
-; X86-SSE-NEXT: pushl %esi
-; X86-SSE-NEXT: subl $16, %esp
+; X86-SSE-NEXT: subl $20, %esp
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: movapd %xmm0, %xmm1
+; X86-SSE-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE-NEXT: movsd %xmm1, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldl {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
@@ -1554,22 +1556,15 @@ define i50 @test_signed_i50_f64(double %f) nounwind {
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: xorl %ecx, %ecx
-; X86-SSE-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SSE-NEXT: cmovbl %ecx, %esi
-; X86-SSE-NEXT: movl $-131072, %eax # imm = 0xFFFE0000
-; X86-SSE-NEXT: cmovael {{[0-9]+}}(%esp), %eax
-; X86-SSE-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE-NEXT: movl $131071, %edx # imm = 0x1FFFF
-; X86-SSE-NEXT: cmovbel %eax, %edx
-; X86-SSE-NEXT: movl $-1, %eax
-; X86-SSE-NEXT: cmovbel %esi, %eax
+; X86-SSE-NEXT: xorl %eax, %eax
; X86-SSE-NEXT: ucomisd %xmm0, %xmm0
-; X86-SSE-NEXT: cmovpl %ecx, %eax
-; X86-SSE-NEXT: cmovpl %ecx, %edx
-; X86-SSE-NEXT: addl $16, %esp
-; X86-SSE-NEXT: popl %esi
+; X86-SSE-NEXT: movl $0, %edx
+; X86-SSE-NEXT: jp .LBB16_2
+; X86-SSE-NEXT: # %bb.1:
+; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-SSE-NEXT: .LBB16_2:
+; X86-SSE-NEXT: addl $20, %esp
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_signed_i50_f64:
@@ -2116,16 +2111,13 @@ define i1 @test_signed_i1_f16(half %f) nounwind {
; X86-SSE-NEXT: calll __extendhfsf2
; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-SSE-NEXT: cvttss2si %xmm0, %ecx
-; X86-SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE-NEXT: movl $255, %eax
-; X86-SSE-NEXT: cmovael %ecx, %eax
-; X86-SSE-NEXT: xorl %ecx, %ecx
-; X86-SSE-NEXT: xorps %xmm1, %xmm1
-; X86-SSE-NEXT: ucomiss %xmm1, %xmm0
-; X86-SSE-NEXT: cmoval %ecx, %eax
+; X86-SSE-NEXT: xorl %eax, %eax
; X86-SSE-NEXT: ucomiss %xmm0, %xmm0
-; X86-SSE-NEXT: cmovpl %ecx, %eax
+; X86-SSE-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE-NEXT: xorps %xmm1, %xmm1
+; X86-SSE-NEXT: minss %xmm0, %xmm1
+; X86-SSE-NEXT: cvttss2si %xmm1, %ecx
+; X86-SSE-NEXT: cmovnpl %ecx, %eax
; X86-SSE-NEXT: # kill: def $al killed $al killed $eax
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: retl
@@ -2134,16 +2126,13 @@ define i1 @test_signed_i1_f16(half %f) nounwind {
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: callq __extendhfsf2 at PLT
-; X64-NEXT: cvttss2si %xmm0, %ecx
-; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovael %ecx, %eax
-; X64-NEXT: xorl %ecx, %ecx
-; X64-NEXT: xorps %xmm1, %xmm1
-; X64-NEXT: ucomiss %xmm1, %xmm0
-; X64-NEXT: cmoval %ecx, %eax
+; X64-NEXT: xorl %eax, %eax
; X64-NEXT: ucomiss %xmm0, %xmm0
-; X64-NEXT: cmovpl %ecx, %eax
+; X64-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: xorps %xmm1, %xmm1
+; X64-NEXT: minss %xmm0, %xmm1
+; X64-NEXT: cvttss2si %xmm1, %ecx
+; X64-NEXT: cmovnpl %ecx, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
@@ -2211,16 +2200,12 @@ define i8 @test_signed_i8_f16(half %f) nounwind {
; X86-SSE-NEXT: calll __extendhfsf2
; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-SSE-NEXT: cvttss2si %xmm0, %eax
-; X86-SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE-NEXT: movl $128, %ecx
-; X86-SSE-NEXT: cmovael %eax, %ecx
-; X86-SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE-NEXT: movl $127, %edx
-; X86-SSE-NEXT: cmovbel %ecx, %edx
; X86-SSE-NEXT: xorl %eax, %eax
; X86-SSE-NEXT: ucomiss %xmm0, %xmm0
-; X86-SSE-NEXT: cmovnpl %edx, %eax
+; X86-SSE-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE-NEXT: cvttss2si %xmm0, %ecx
+; X86-SSE-NEXT: cmovnpl %ecx, %eax
; X86-SSE-NEXT: # kill: def $al killed $al killed $eax
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: retl
@@ -2229,16 +2214,12 @@ define i8 @test_signed_i8_f16(half %f) nounwind {
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: callq __extendhfsf2 at PLT
-; X64-NEXT: cvttss2si %xmm0, %eax
-; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-NEXT: movl $128, %ecx
-; X64-NEXT: cmovael %eax, %ecx
-; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-NEXT: movl $127, %edx
-; X64-NEXT: cmovbel %ecx, %edx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: ucomiss %xmm0, %xmm0
-; X64-NEXT: cmovnpl %edx, %eax
+; X64-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: cvttss2si %xmm0, %ecx
+; X64-NEXT: cmovnpl %ecx, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
@@ -2307,16 +2288,12 @@ define i13 @test_signed_i13_f16(half %f) nounwind {
; X86-SSE-NEXT: calll __extendhfsf2
; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-SSE-NEXT: cvttss2si %xmm0, %eax
-; X86-SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE-NEXT: movl $61440, %ecx # imm = 0xF000
-; X86-SSE-NEXT: cmovael %eax, %ecx
-; X86-SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE-NEXT: movl $4095, %edx # imm = 0xFFF
-; X86-SSE-NEXT: cmovbel %ecx, %edx
; X86-SSE-NEXT: xorl %eax, %eax
; X86-SSE-NEXT: ucomiss %xmm0, %xmm0
-; X86-SSE-NEXT: cmovnpl %edx, %eax
+; X86-SSE-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE-NEXT: cvttss2si %xmm0, %ecx
+; X86-SSE-NEXT: cmovnpl %ecx, %eax
; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: retl
@@ -2325,16 +2302,12 @@ define i13 @test_signed_i13_f16(half %f) nounwind {
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: callq __extendhfsf2 at PLT
-; X64-NEXT: cvttss2si %xmm0, %eax
-; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-NEXT: movl $61440, %ecx # imm = 0xF000
-; X64-NEXT: cmovael %eax, %ecx
-; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-NEXT: movl $4095, %edx # imm = 0xFFF
-; X64-NEXT: cmovbel %ecx, %edx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: ucomiss %xmm0, %xmm0
-; X64-NEXT: cmovnpl %edx, %eax
+; X64-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: cvttss2si %xmm0, %ecx
+; X64-NEXT: cmovnpl %ecx, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
@@ -2403,16 +2376,12 @@ define i16 @test_signed_i16_f16(half %f) nounwind {
; X86-SSE-NEXT: calll __extendhfsf2
; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-SSE-NEXT: cvttss2si %xmm0, %eax
-; X86-SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE-NEXT: movl $32768, %ecx # imm = 0x8000
-; X86-SSE-NEXT: cmovael %eax, %ecx
-; X86-SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE-NEXT: movl $32767, %edx # imm = 0x7FFF
-; X86-SSE-NEXT: cmovbel %ecx, %edx
; X86-SSE-NEXT: xorl %eax, %eax
; X86-SSE-NEXT: ucomiss %xmm0, %xmm0
-; X86-SSE-NEXT: cmovnpl %edx, %eax
+; X86-SSE-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE-NEXT: cvttss2si %xmm0, %ecx
+; X86-SSE-NEXT: cmovnpl %ecx, %eax
; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: retl
@@ -2421,16 +2390,12 @@ define i16 @test_signed_i16_f16(half %f) nounwind {
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: callq __extendhfsf2 at PLT
-; X64-NEXT: cvttss2si %xmm0, %eax
-; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-NEXT: movl $32768, %ecx # imm = 0x8000
-; X64-NEXT: cmovael %eax, %ecx
-; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-NEXT: movl $32767, %edx # imm = 0x7FFF
-; X64-NEXT: cmovbel %ecx, %edx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: ucomiss %xmm0, %xmm0
-; X64-NEXT: cmovnpl %edx, %eax
+; X64-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: cvttss2si %xmm0, %ecx
+; X64-NEXT: cmovnpl %ecx, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
@@ -2499,16 +2464,12 @@ define i19 @test_signed_i19_f16(half %f) nounwind {
; X86-SSE-NEXT: calll __extendhfsf2
; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-SSE-NEXT: cvttss2si %xmm0, %eax
-; X86-SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE-NEXT: movl $-262144, %ecx # imm = 0xFFFC0000
-; X86-SSE-NEXT: cmovael %eax, %ecx
-; X86-SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE-NEXT: movl $262143, %edx # imm = 0x3FFFF
-; X86-SSE-NEXT: cmovbel %ecx, %edx
; X86-SSE-NEXT: xorl %eax, %eax
; X86-SSE-NEXT: ucomiss %xmm0, %xmm0
-; X86-SSE-NEXT: cmovnpl %edx, %eax
+; X86-SSE-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE-NEXT: cvttss2si %xmm0, %ecx
+; X86-SSE-NEXT: cmovnpl %ecx, %eax
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: retl
;
@@ -2516,16 +2477,12 @@ define i19 @test_signed_i19_f16(half %f) nounwind {
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: callq __extendhfsf2 at PLT
-; X64-NEXT: cvttss2si %xmm0, %eax
-; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-NEXT: movl $-262144, %ecx # imm = 0xFFFC0000
-; X64-NEXT: cmovael %eax, %ecx
-; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-NEXT: movl $262143, %edx # imm = 0x3FFFF
-; X64-NEXT: cmovbel %ecx, %edx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: ucomiss %xmm0, %xmm0
-; X64-NEXT: cmovnpl %edx, %eax
+; X64-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: cvttss2si %xmm0, %ecx
+; X64-NEXT: cmovnpl %ecx, %eax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
%x = call i19 @llvm.fptosi.sat.i19.f16(half %f)
diff --git a/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll b/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll
index a251e43e4295b..0183d7a7d0026 100644
--- a/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll
+++ b/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll
@@ -589,34 +589,28 @@ declare <8 x i128> @llvm.fptosi.sat.v8i128.v8f16(<8 x half>)
define <8 x i1> @test_signed_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-LABEL: test_signed_v8i1_v8f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: pushq %rbx
-; CHECK-NEXT: subq $72, %rsp
+; CHECK-NEXT: subq $64, %rsp
; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: movl $65535, %ebp # imm = 0xFFFF
-; CHECK-NEXT: cmovbl %ebp, %eax
; CHECK-NEXT: xorl %ebx, %ebx
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmoval %ebx, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
+; CHECK-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: minss %xmm0, %xmm1
+; CHECK-NEXT: cvttss2si %xmm1, %eax
; CHECK-NEXT: cmovpl %ebx, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovbl %ebp, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmoval %ebx, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
+; CHECK-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: minss %xmm0, %xmm1
+; CHECK-NEXT: cvttss2si %xmm1, %eax
; CHECK-NEXT: cmovpl %ebx, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
@@ -625,26 +619,22 @@ define <8 x i1> @test_signed_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovbl %ebp, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmoval %ebx, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
+; CHECK-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: minss %xmm0, %xmm1
+; CHECK-NEXT: cvttss2si %xmm1, %eax
; CHECK-NEXT: cmovpl %ebx, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovbl %ebp, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmoval %ebx, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
+; CHECK-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: minss %xmm0, %xmm1
+; CHECK-NEXT: cvttss2si %xmm1, %eax
; CHECK-NEXT: cmovpl %ebx, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
@@ -655,26 +645,22 @@ define <8 x i1> @test_signed_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: psrlq $48, %xmm0
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovbl %ebp, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmoval %ebx, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
+; CHECK-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: minss %xmm0, %xmm1
+; CHECK-NEXT: cvttss2si %xmm1, %eax
; CHECK-NEXT: cmovpl %ebx, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovbl %ebp, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmoval %ebx, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
+; CHECK-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: minss %xmm0, %xmm1
+; CHECK-NEXT: cvttss2si %xmm1, %eax
; CHECK-NEXT: cmovpl %ebx, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
@@ -682,26 +668,22 @@ define <8 x i1> @test_signed_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovbl %ebp, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmoval %ebx, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
+; CHECK-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: minss %xmm0, %xmm1
+; CHECK-NEXT: cvttss2si %xmm1, %eax
; CHECK-NEXT: cmovpl %ebx, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: psrld $16, %xmm0
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovbl %ebp, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmoval %ebx, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
+; CHECK-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: minss %xmm0, %xmm1
+; CHECK-NEXT: cvttss2si %xmm1, %eax
; CHECK-NEXT: cmovpl %ebx, %eax
; CHECK-NEXT: movd %eax, %xmm1
; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
@@ -710,9 +692,8 @@ define <8 x i1> @test_signed_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
-; CHECK-NEXT: addq $72, %rsp
+; CHECK-NEXT: addq $64, %rsp
; CHECK-NEXT: popq %rbx
-; CHECK-NEXT: popq %rbp
; CHECK-NEXT: retq
%x = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> %f)
ret <8 x i1> %x
@@ -724,116 +705,94 @@ define <8 x i8> @test_signed_v8i8_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: pushq %r14
-; CHECK-NEXT: pushq %r13
-; CHECK-NEXT: pushq %r12
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: subq $40, %rsp
; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %r12d
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: movl $128, %ebx
-; CHECK-NEXT: cmovbl %ebx, %r12d
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: movl $127, %ebp
-; CHECK-NEXT: cmoval %ebp, %r12d
-; CHECK-NEXT: xorl %r14d, %r14d
+; CHECK-NEXT: xorl %ebx, %ebx
; CHECK-NEXT: ucomiss %xmm0, %xmm0
-; CHECK-NEXT: cmovpl %r14d, %r12d
-; CHECK-NEXT: shll $8, %r12d
+; CHECK-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %r14d
+; CHECK-NEXT: cmovpl %ebx, %r14d
+; CHECK-NEXT: shll $8, %r14d
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
-; CHECK-NEXT: cmovpl %r14d, %eax
-; CHECK-NEXT: movzbl %al, %r15d
-; CHECK-NEXT: orl %r12d, %r15d
+; CHECK-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: cmovpl %ebx, %eax
+; CHECK-NEXT: movzbl %al, %ebp
+; CHECK-NEXT: orl %r14d, %ebp
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
-; CHECK-NEXT: cmovpl %r14d, %eax
-; CHECK-NEXT: movzbl %al, %r12d
+; CHECK-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: cmovpl %ebx, %eax
+; CHECK-NEXT: movzbl %al, %r14d
; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: psrld $16, %xmm0
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
-; CHECK-NEXT: cmovpl %r14d, %eax
-; CHECK-NEXT: movzbl %al, %r13d
-; CHECK-NEXT: shll $8, %r13d
-; CHECK-NEXT: orl %r12d, %r13d
+; CHECK-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: cmovpl %ebx, %eax
+; CHECK-NEXT: movzbl %al, %r15d
+; CHECK-NEXT: shll $8, %r15d
+; CHECK-NEXT: orl %r14d, %r15d
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
-; CHECK-NEXT: cmovpl %r14d, %eax
-; CHECK-NEXT: movzbl %al, %r12d
-; CHECK-NEXT: shll $16, %r12d
-; CHECK-NEXT: orl %r13d, %r12d
+; CHECK-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: cmovpl %ebx, %eax
+; CHECK-NEXT: movzbl %al, %r14d
+; CHECK-NEXT: shll $16, %r14d
+; CHECK-NEXT: orl %r15d, %r14d
; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: psrlq $48, %xmm0
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
-; CHECK-NEXT: cmovpl %r14d, %eax
+; CHECK-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: cmovpl %ebx, %eax
; CHECK-NEXT: shll $24, %eax
-; CHECK-NEXT: orl %r12d, %eax
+; CHECK-NEXT: orl %r14d, %eax
; CHECK-NEXT: movd %eax, %xmm0
-; CHECK-NEXT: pinsrw $2, %r15d, %xmm0
+; CHECK-NEXT: pinsrw $2, %ebp, %xmm0
; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %r15d
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovbl %ebx, %r15d
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %r15d
; CHECK-NEXT: ucomiss %xmm0, %xmm0
-; CHECK-NEXT: cmovpl %r14d, %r15d
-; CHECK-NEXT: shll $8, %r15d
+; CHECK-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %ebp
+; CHECK-NEXT: cmovpl %ebx, %ebp
+; CHECK-NEXT: shll $8, %ebp
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
-; CHECK-NEXT: cmovpl %r14d, %eax
+; CHECK-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: cmovpl %ebx, %eax
; CHECK-NEXT: movzbl %al, %eax
-; CHECK-NEXT: orl %r15d, %eax
+; CHECK-NEXT: orl %ebp, %eax
; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: pinsrw $3, %eax, %xmm0
; CHECK-NEXT: addq $40, %rsp
; CHECK-NEXT: popq %rbx
-; CHECK-NEXT: popq %r12
-; CHECK-NEXT: popq %r13
; CHECK-NEXT: popq %r14
; CHECK-NEXT: popq %r15
; CHECK-NEXT: popq %rbp
@@ -845,35 +804,27 @@ define <8 x i8> @test_signed_v8i8_v8f16(<8 x half> %f) nounwind {
define <8 x i16> @test_signed_v8i16_v8f16(<8 x half> %f) nounwind {
; CHECK-LABEL: test_signed_v8i16_v8f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: pushq %rbp
-; CHECK-NEXT: pushq %r14
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: subq $64, %rsp
; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: movl $32768, %ebx # imm = 0x8000
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: movl $32767, %ebp # imm = 0x7FFF
-; CHECK-NEXT: cmoval %ebp, %eax
-; CHECK-NEXT: xorl %r14d, %r14d
+; CHECK-NEXT: xorl %ebx, %ebx
; CHECK-NEXT: ucomiss %xmm0, %xmm0
-; CHECK-NEXT: cmovpl %r14d, %eax
+; CHECK-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: cmovpl %ebx, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
-; CHECK-NEXT: cmovpl %r14d, %eax
+; CHECK-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: cmovpl %ebx, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
@@ -881,25 +832,21 @@ define <8 x i16> @test_signed_v8i16_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
-; CHECK-NEXT: cmovpl %r14d, %eax
+; CHECK-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: cmovpl %ebx, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
-; CHECK-NEXT: cmovpl %r14d, %eax
+; CHECK-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: cmovpl %ebx, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
@@ -909,50 +856,42 @@ define <8 x i16> @test_signed_v8i16_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: psrlq $48, %xmm0
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
-; CHECK-NEXT: cmovpl %r14d, %eax
+; CHECK-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: cmovpl %ebx, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
-; CHECK-NEXT: cmovpl %r14d, %eax
+; CHECK-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: cmovpl %ebx, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
-; CHECK-NEXT: cmovpl %r14d, %eax
+; CHECK-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: cmovpl %ebx, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: psrld $16, %xmm0
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
-; CHECK-NEXT: cmovpl %r14d, %eax
+; CHECK-NEXT: maxss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: cmovpl %ebx, %eax
; CHECK-NEXT: movd %eax, %xmm1
; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
@@ -962,8 +901,6 @@ define <8 x i16> @test_signed_v8i16_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
; CHECK-NEXT: addq $64, %rsp
; CHECK-NEXT: popq %rbx
-; CHECK-NEXT: popq %r14
-; CHECK-NEXT: popq %rbp
; CHECK-NEXT: retq
%x = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> %f)
ret <8 x i16> %x
diff --git a/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll b/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll
index 47dc3ca3616ea..a074c78d512f5 100644
--- a/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll
@@ -1399,45 +1399,33 @@ define i50 @test_unsigned_i50_f64(double %f) nounwind {
;
; X86-SSE-LABEL: test_unsigned_i50_f64:
; X86-SSE: # %bb.0:
-; X86-SSE-NEXT: pushl %esi
-; X86-SSE-NEXT: subl $16, %esp
+; X86-SSE-NEXT: subl $20, %esp
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE-NEXT: movsd {{.*#+}} xmm2 = [9.2233720368547758E+18,0.0E+0]
-; X86-SSE-NEXT: ucomisd %xmm0, %xmm2
; X86-SSE-NEXT: xorpd %xmm1, %xmm1
-; X86-SSE-NEXT: jbe .LBB16_2
+; X86-SSE-NEXT: maxsd %xmm1, %xmm0
+; X86-SSE-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0]
+; X86-SSE-NEXT: ucomisd %xmm1, %xmm0
+; X86-SSE-NEXT: jae .LBB16_2
; X86-SSE-NEXT: # %bb.1:
-; X86-SSE-NEXT: xorpd %xmm2, %xmm2
+; X86-SSE-NEXT: xorpd %xmm1, %xmm1
; X86-SSE-NEXT: .LBB16_2:
-; X86-SSE-NEXT: movapd %xmm0, %xmm3
-; X86-SSE-NEXT: subsd %xmm2, %xmm3
-; X86-SSE-NEXT: movsd %xmm3, {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: setbe %cl
+; X86-SSE-NEXT: subsd %xmm1, %xmm0
+; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: setae %al
; X86-SSE-NEXT: fldl {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-SSE-NEXT: orl $3072, %eax # imm = 0xC00
-; X86-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT: orl $3072, %ecx # imm = 0xC00
+; X86-SSE-NEXT: movw %cx, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: xorl %eax, %eax
-; X86-SSE-NEXT: ucomisd %xmm1, %xmm0
-; X86-SSE-NEXT: movl $0, %esi
-; X86-SSE-NEXT: jb .LBB16_4
-; X86-SSE-NEXT: # %bb.3:
-; X86-SSE-NEXT: movzbl %cl, %eax
-; X86-SSE-NEXT: shll $31, %eax
-; X86-SSE-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SSE-NEXT: .LBB16_4:
-; X86-SSE-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE-NEXT: movl $262143, %edx # imm = 0x3FFFF
-; X86-SSE-NEXT: cmovbel %eax, %edx
-; X86-SSE-NEXT: movl $-1, %eax
-; X86-SSE-NEXT: cmovbel %esi, %eax
-; X86-SSE-NEXT: addl $16, %esp
-; X86-SSE-NEXT: popl %esi
+; X86-SSE-NEXT: movzbl %al, %edx
+; X86-SSE-NEXT: shll $31, %edx
+; X86-SSE-NEXT: xorl {{[0-9]+}}(%esp), %edx
+; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT: addl $20, %esp
; X86-SSE-NEXT: retl
;
; X64-LABEL: test_unsigned_i50_f64:
@@ -1928,14 +1916,10 @@ define i1 @test_unsigned_i1_f16(half %f) nounwind {
; X86-SSE-NEXT: calll __extendhfsf2
; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-SSE-NEXT: cvttss2si %xmm0, %eax
-; X86-SSE-NEXT: xorl %ecx, %ecx
; X86-SSE-NEXT: xorps %xmm1, %xmm1
-; X86-SSE-NEXT: ucomiss %xmm1, %xmm0
-; X86-SSE-NEXT: cmovael %eax, %ecx
-; X86-SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE-NEXT: movl $1, %eax
-; X86-SSE-NEXT: cmovbel %ecx, %eax
+; X86-SSE-NEXT: maxss %xmm1, %xmm0
+; X86-SSE-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE-NEXT: cvttss2si %xmm0, %eax
; X86-SSE-NEXT: # kill: def $al killed $al killed $eax
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: retl
@@ -1944,14 +1928,10 @@ define i1 @test_unsigned_i1_f16(half %f) nounwind {
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: callq __extendhfsf2 at PLT
-; X64-NEXT: cvttss2si %xmm0, %eax
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: xorps %xmm1, %xmm1
-; X64-NEXT: ucomiss %xmm1, %xmm0
-; X64-NEXT: cmovael %eax, %ecx
-; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-NEXT: movl $1, %eax
-; X64-NEXT: cmovbel %ecx, %eax
+; X64-NEXT: maxss %xmm1, %xmm0
+; X64-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: cvttss2si %xmm0, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
@@ -2010,14 +1990,10 @@ define i8 @test_unsigned_i8_f16(half %f) nounwind {
; X86-SSE-NEXT: calll __extendhfsf2
; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-SSE-NEXT: cvttss2si %xmm0, %eax
-; X86-SSE-NEXT: xorl %ecx, %ecx
; X86-SSE-NEXT: xorps %xmm1, %xmm1
-; X86-SSE-NEXT: ucomiss %xmm1, %xmm0
-; X86-SSE-NEXT: cmovael %eax, %ecx
-; X86-SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE-NEXT: movl $255, %eax
-; X86-SSE-NEXT: cmovbel %ecx, %eax
+; X86-SSE-NEXT: maxss %xmm1, %xmm0
+; X86-SSE-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE-NEXT: cvttss2si %xmm0, %eax
; X86-SSE-NEXT: # kill: def $al killed $al killed $eax
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: retl
@@ -2026,14 +2002,10 @@ define i8 @test_unsigned_i8_f16(half %f) nounwind {
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: callq __extendhfsf2 at PLT
-; X64-NEXT: cvttss2si %xmm0, %eax
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: xorps %xmm1, %xmm1
-; X64-NEXT: ucomiss %xmm1, %xmm0
-; X64-NEXT: cmovael %eax, %ecx
-; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-NEXT: movl $255, %eax
-; X64-NEXT: cmovbel %ecx, %eax
+; X64-NEXT: maxss %xmm1, %xmm0
+; X64-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: cvttss2si %xmm0, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
@@ -2091,14 +2063,10 @@ define i13 @test_unsigned_i13_f16(half %f) nounwind {
; X86-SSE-NEXT: calll __extendhfsf2
; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-SSE-NEXT: cvttss2si %xmm0, %eax
-; X86-SSE-NEXT: xorl %ecx, %ecx
; X86-SSE-NEXT: xorps %xmm1, %xmm1
-; X86-SSE-NEXT: ucomiss %xmm1, %xmm0
-; X86-SSE-NEXT: cmovael %eax, %ecx
-; X86-SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE-NEXT: movl $8191, %eax # imm = 0x1FFF
-; X86-SSE-NEXT: cmovbel %ecx, %eax
+; X86-SSE-NEXT: maxss %xmm1, %xmm0
+; X86-SSE-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE-NEXT: cvttss2si %xmm0, %eax
; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: retl
@@ -2107,14 +2075,10 @@ define i13 @test_unsigned_i13_f16(half %f) nounwind {
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: callq __extendhfsf2 at PLT
-; X64-NEXT: cvttss2si %xmm0, %eax
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: xorps %xmm1, %xmm1
-; X64-NEXT: ucomiss %xmm1, %xmm0
-; X64-NEXT: cmovael %eax, %ecx
-; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-NEXT: movl $8191, %eax # imm = 0x1FFF
-; X64-NEXT: cmovbel %ecx, %eax
+; X64-NEXT: maxss %xmm1, %xmm0
+; X64-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: cvttss2si %xmm0, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
@@ -2172,14 +2136,10 @@ define i16 @test_unsigned_i16_f16(half %f) nounwind {
; X86-SSE-NEXT: calll __extendhfsf2
; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-SSE-NEXT: cvttss2si %xmm0, %eax
-; X86-SSE-NEXT: xorl %ecx, %ecx
; X86-SSE-NEXT: xorps %xmm1, %xmm1
-; X86-SSE-NEXT: ucomiss %xmm1, %xmm0
-; X86-SSE-NEXT: cmovael %eax, %ecx
-; X86-SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE-NEXT: movl $65535, %eax # imm = 0xFFFF
-; X86-SSE-NEXT: cmovbel %ecx, %eax
+; X86-SSE-NEXT: maxss %xmm1, %xmm0
+; X86-SSE-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE-NEXT: cvttss2si %xmm0, %eax
; X86-SSE-NEXT: # kill: def $ax killed $ax killed $eax
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: retl
@@ -2188,14 +2148,10 @@ define i16 @test_unsigned_i16_f16(half %f) nounwind {
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: callq __extendhfsf2 at PLT
-; X64-NEXT: cvttss2si %xmm0, %eax
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: xorps %xmm1, %xmm1
-; X64-NEXT: ucomiss %xmm1, %xmm0
-; X64-NEXT: cmovael %eax, %ecx
-; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-NEXT: movl $65535, %eax # imm = 0xFFFF
-; X64-NEXT: cmovbel %ecx, %eax
+; X64-NEXT: maxss %xmm1, %xmm0
+; X64-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: cvttss2si %xmm0, %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
@@ -2253,21 +2209,16 @@ define i19 @test_unsigned_i19_f16(half %f) nounwind {
; X86-SSE-NEXT: calll __extendhfsf2
; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp)
; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-SSE-NEXT: cvttss2si %xmm0, %eax
-; X86-SSE-NEXT: movl %eax, %ecx
-; X86-SSE-NEXT: sarl $31, %ecx
-; X86-SSE-NEXT: movaps %xmm0, %xmm1
-; X86-SSE-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE-NEXT: cvttss2si %xmm1, %edx
-; X86-SSE-NEXT: andl %ecx, %edx
-; X86-SSE-NEXT: orl %eax, %edx
-; X86-SSE-NEXT: xorl %ecx, %ecx
; X86-SSE-NEXT: xorps %xmm1, %xmm1
-; X86-SSE-NEXT: ucomiss %xmm1, %xmm0
-; X86-SSE-NEXT: cmovael %edx, %ecx
-; X86-SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE-NEXT: movl $524287, %eax # imm = 0x7FFFF
-; X86-SSE-NEXT: cmovbel %ecx, %eax
+; X86-SSE-NEXT: maxss %xmm1, %xmm0
+; X86-SSE-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE-NEXT: cvttss2si %xmm0, %ecx
+; X86-SSE-NEXT: movl %ecx, %edx
+; X86-SSE-NEXT: sarl $31, %edx
+; X86-SSE-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE-NEXT: cvttss2si %xmm0, %eax
+; X86-SSE-NEXT: andl %edx, %eax
+; X86-SSE-NEXT: orl %ecx, %eax
; X86-SSE-NEXT: addl $12, %esp
; X86-SSE-NEXT: retl
;
@@ -2275,14 +2226,11 @@ define i19 @test_unsigned_i19_f16(half %f) nounwind {
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: callq __extendhfsf2 at PLT
-; X64-NEXT: cvttss2si %xmm0, %rax
-; X64-NEXT: xorl %ecx, %ecx
; X64-NEXT: xorps %xmm1, %xmm1
-; X64-NEXT: ucomiss %xmm1, %xmm0
-; X64-NEXT: cmovael %eax, %ecx
-; X64-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-NEXT: movl $524287, %eax # imm = 0x7FFFF
-; X64-NEXT: cmovbel %ecx, %eax
+; X64-NEXT: maxss %xmm1, %xmm0
+; X64-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-NEXT: cvttss2si %xmm0, %rax
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
%x = call i19 @llvm.fptoui.sat.i19.f16(half %f)
diff --git a/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll b/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll
index 7b1db5c29abaf..578eaa06ca7d3 100644
--- a/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll
+++ b/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll
@@ -583,31 +583,23 @@ declare <8 x i128> @llvm.fptoui.sat.v8i128.v8f16(<8 x half>)
define <8 x i1> @test_unsigned_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-LABEL: test_unsigned_v8i1_v8f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: pushq %rbp
-; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: subq $72, %rsp
; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorl %ebx, %ebx
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: movl $1, %ebp
-; CHECK-NEXT: cmoval %ebp, %eax
+; CHECK-NEXT: maxss %xmm1, %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
+; CHECK-NEXT: maxss %xmm1, %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
@@ -615,23 +607,19 @@ define <8 x i1> @test_unsigned_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
+; CHECK-NEXT: maxss %xmm1, %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
+; CHECK-NEXT: maxss %xmm1, %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
@@ -641,46 +629,38 @@ define <8 x i1> @test_unsigned_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: psrlq $48, %xmm0
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
+; CHECK-NEXT: maxss %xmm1, %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
+; CHECK-NEXT: maxss %xmm1, %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
+; CHECK-NEXT: maxss %xmm1, %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: psrld $16, %xmm0
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
+; CHECK-NEXT: maxss %xmm1, %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: movd %eax, %xmm1
; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
@@ -689,8 +669,6 @@ define <8 x i1> @test_unsigned_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
; CHECK-NEXT: addq $72, %rsp
-; CHECK-NEXT: popq %rbx
-; CHECK-NEXT: popq %rbp
; CHECK-NEXT: retq
%x = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> %f)
ret <8 x i1> %x
@@ -700,109 +678,87 @@ define <8 x i8> @test_unsigned_v8i8_v8f16(<8 x half> %f) nounwind {
; CHECK-LABEL: test_unsigned_v8i8_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
-; CHECK-NEXT: pushq %r15
; CHECK-NEXT: pushq %r14
-; CHECK-NEXT: pushq %r12
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: subq $32, %rsp
; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %r15d
-; CHECK-NEXT: xorl %ebx, %ebx
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmovbl %ebx, %r15d
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: movl $255, %ebp
-; CHECK-NEXT: cmoval %ebp, %r15d
-; CHECK-NEXT: shll $8, %r15d
+; CHECK-NEXT: maxss %xmm1, %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %ebp
+; CHECK-NEXT: shll $8, %ebp
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
-; CHECK-NEXT: movzbl %al, %r14d
-; CHECK-NEXT: orl %r15d, %r14d
+; CHECK-NEXT: maxss %xmm1, %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movzbl %al, %ebx
+; CHECK-NEXT: orl %ebp, %ebx
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
-; CHECK-NEXT: movzbl %al, %r15d
+; CHECK-NEXT: maxss %xmm1, %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movzbl %al, %ebp
; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: psrld $16, %xmm0
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
-; CHECK-NEXT: movzbl %al, %r12d
-; CHECK-NEXT: shll $8, %r12d
-; CHECK-NEXT: orl %r15d, %r12d
+; CHECK-NEXT: maxss %xmm1, %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movzbl %al, %r14d
+; CHECK-NEXT: shll $8, %r14d
+; CHECK-NEXT: orl %ebp, %r14d
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
-; CHECK-NEXT: movzbl %al, %r15d
-; CHECK-NEXT: shll $16, %r15d
-; CHECK-NEXT: orl %r12d, %r15d
+; CHECK-NEXT: maxss %xmm1, %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movzbl %al, %ebp
+; CHECK-NEXT: shll $16, %ebp
+; CHECK-NEXT: orl %r14d, %ebp
; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: psrlq $48, %xmm0
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
+; CHECK-NEXT: maxss %xmm1, %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: shll $24, %eax
-; CHECK-NEXT: orl %r15d, %eax
+; CHECK-NEXT: orl %ebp, %eax
; CHECK-NEXT: movd %eax, %xmm0
-; CHECK-NEXT: pinsrw $2, %r14d, %xmm0
+; CHECK-NEXT: pinsrw $2, %ebx, %xmm0
; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %r14d
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmovbl %ebx, %r14d
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %r14d
-; CHECK-NEXT: shll $8, %r14d
+; CHECK-NEXT: maxss %xmm1, %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %ebx
+; CHECK-NEXT: shll $8, %ebx
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
+; CHECK-NEXT: maxss %xmm1, %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: movzbl %al, %eax
-; CHECK-NEXT: orl %r14d, %eax
+; CHECK-NEXT: orl %ebx, %eax
; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: pinsrw $3, %eax, %xmm0
; CHECK-NEXT: addq $32, %rsp
; CHECK-NEXT: popq %rbx
-; CHECK-NEXT: popq %r12
; CHECK-NEXT: popq %r14
-; CHECK-NEXT: popq %r15
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: retq
%x = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> %f)
@@ -812,31 +768,23 @@ define <8 x i8> @test_unsigned_v8i8_v8f16(<8 x half> %f) nounwind {
define <8 x i16> @test_unsigned_v8i16_v8f16(<8 x half> %f) nounwind {
; CHECK-LABEL: test_unsigned_v8i16_v8f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: pushq %rbp
-; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: subq $72, %rsp
; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorl %ebx, %ebx
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: movl $65535, %ebp # imm = 0xFFFF
-; CHECK-NEXT: cmoval %ebp, %eax
+; CHECK-NEXT: maxss %xmm1, %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
+; CHECK-NEXT: maxss %xmm1, %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
@@ -844,23 +792,19 @@ define <8 x i16> @test_unsigned_v8i16_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
+; CHECK-NEXT: maxss %xmm1, %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
+; CHECK-NEXT: maxss %xmm1, %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
@@ -870,46 +814,38 @@ define <8 x i16> @test_unsigned_v8i16_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: psrlq $48, %xmm0
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
+; CHECK-NEXT: maxss %xmm1, %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
+; CHECK-NEXT: maxss %xmm1, %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
+; CHECK-NEXT: maxss %xmm1, %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: psrld $16, %xmm0
; CHECK-NEXT: callq __extendhfsf2 at PLT
-; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
-; CHECK-NEXT: cmovbl %ebx, %eax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT: cmoval %ebp, %eax
+; CHECK-NEXT: maxss %xmm1, %xmm0
+; CHECK-NEXT: minss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: movd %eax, %xmm1
; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
@@ -918,8 +854,6 @@ define <8 x i16> @test_unsigned_v8i16_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
; CHECK-NEXT: addq $72, %rsp
-; CHECK-NEXT: popq %rbx
-; CHECK-NEXT: popq %rbp
; CHECK-NEXT: retq
%x = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> %f)
ret <8 x i16> %x
More information about the llvm-branch-commits
mailing list