[llvm] SelectionDAG: Improve expandFP_TO_INT_SAT (PR #139217)
YunQiang Su via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 2 22:52:18 PDT 2025
https://github.com/wzssyqa updated https://github.com/llvm/llvm-project/pull/139217
>From 3ab1db883f4e3f408080a6e8d18dabac68335102 Mon Sep 17 00:00:00 2001
From: YunQiang Su <yunqiang at isrc.iscas.ac.cn>
Date: Fri, 9 May 2025 15:04:23 +0800
Subject: [PATCH 1/2] SelectionDAG: Improve expandFP_TO_INT_SAT
Currently, expandFP_TO_INT_SAT uses FMAXNUM and FMINNUM, which is
not correct if the Src is sNaN.
Let's try all 3 flavor of Max/Min for it:
1) FMAXIMUMNUM/FMINIMUMNUM
See test/CodeGen/RISCV/bfloat-convert.ll
2) FMAXNUM/FMAXNUM
See test/CodeGen/Mips/Half2Int16.ll
3) FMAXIMUM/FMAXIMUM
See test/CodeGen/WebAssembly/Half2Int16.ll
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 45 ++++--
llvm/test/CodeGen/Mips/Half2Int16.ll | 145 ++++++++++++++++++
llvm/test/CodeGen/WebAssembly/Half2Int16.ll | 119 ++++++++++++++
3 files changed, 300 insertions(+), 9 deletions(-)
create mode 100644 llvm/test/CodeGen/Mips/Half2Int16.ll
create mode 100644 llvm/test/CodeGen/WebAssembly/Half2Int16.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index a476b191abf62..276e3b09d914d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -11543,22 +11543,49 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
// If the integer bounds are exactly representable as floats and min/max are
// legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
// of comparisons and selects.
- bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
- isOperationLegal(ISD::FMAXNUM, SrcVT);
- if (AreExactFloatBounds && MinMaxLegal) {
+ bool MinMax2019NumLegal = isOperationLegal(ISD::FMINIMUMNUM, SrcVT) &&
+ isOperationLegal(ISD::FMAXIMUMNUM, SrcVT);
+ bool MinMax2019Legal = isOperationLegal(ISD::FMINIMUM, SrcVT) &&
+ isOperationLegal(ISD::FMAXIMUM, SrcVT);
+ bool MinMax2008Legal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
+ isOperationLegal(ISD::FMAXNUM, SrcVT);
+
+ if (AreExactFloatBounds &&
+ (MinMax2019NumLegal || MinMax2019Legal || MinMax2008Legal)) {
SDValue Clamped = Src;
-
- // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
- Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
- // Clamp by MaxFloat from above. NaN cannot occur.
- Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
+ bool Use2019 = false;
+
+ if (MinMax2019NumLegal) {
+ // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
+ Clamped = DAG.getNode(ISD::FMAXIMUMNUM, dl, SrcVT, Clamped, MinFloatNode);
+ // Clamp by MaxFloat from above. NaN cannot occur.
+ Clamped = DAG.getNode(ISD::FMINIMUMNUM, dl, SrcVT, Clamped, MaxFloatNode);
+ } else if (MinMax2008Legal) {
+ // Try 2008 first as it has better performance for converting SNaN to
+ // unsigned.
+ if (!IsSigned && !DAG.isKnownNeverSNaN(Clamped)) {
+ Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, Clamped);
+ }
+ // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
+ Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
+ // Clamp by MaxFloat from above. NaN cannot occur.
+ Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
+ } else if (MinMax2019Legal) {
+ // Clamp Src by MinFloat from below. If Src is NaN the result is qNaN.
+ Clamped = DAG.getNode(ISD::FMAXIMUM, dl, SrcVT, Clamped, MinFloatNode);
+ // Clamp by MaxFloat from above. NaN may occur.
+ Clamped = DAG.getNode(ISD::FMINIMUM, dl, SrcVT, Clamped, MaxFloatNode);
+ Use2019 = true;
+ } else {
+ llvm_unreachable("No Min/Max supported?");
+ }
// Convert clamped value to integer.
SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
dl, DstVT, Clamped);
// In the unsigned case we're done, because we mapped NaN to MinFloat,
// which will cast to zero.
- if (!IsSigned)
+ if ((!IsSigned && !Use2019) || DAG.isKnownNeverNaN(Src))
return FpToInt;
// Otherwise, select 0 if Src is NaN.
diff --git a/llvm/test/CodeGen/Mips/Half2Int16.ll b/llvm/test/CodeGen/Mips/Half2Int16.ll
new file mode 100644
index 0000000000000..9ef54b516754f
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/Half2Int16.ll
@@ -0,0 +1,145 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=mipsisa32r6 -mattr=-soft-float | FileCheck %s
+
+define i16 @fcvt_h_s_sat(float %a) {
+; CHECK-LABEL: fcvt_h_s_sat:
+; CHECK: # %bb.0: # %start
+; CHECK-NEXT: lui $1, %hi($CPI0_0)
+; CHECK-NEXT: lwc1 $f0, %lo($CPI0_0)($1)
+; CHECK-NEXT: max.s $f0, $f12, $f0
+; CHECK-NEXT: lui $1, %hi($CPI0_1)
+; CHECK-NEXT: lwc1 $f1, %lo($CPI0_1)($1)
+; CHECK-NEXT: min.s $f0, $f0, $f1
+; CHECK-NEXT: trunc.w.s $f0, $f0
+; CHECK-NEXT: mfc1 $1, $f0
+; CHECK-NEXT: cmp.un.s $f0, $f12, $f12
+; CHECK-NEXT: mfc1 $2, $f0
+; CHECK-NEXT: jr $ra
+; CHECK-NEXT: seleqz $2, $1, $2
+start:
+ %0 = tail call i16 @llvm.fptosi.sat.i16.f32(float %a)
+ ret i16 %0
+}
+
+define i16 @fcvt_hu_s_sat(float %a) {
+; CHECK-LABEL: fcvt_hu_s_sat:
+; CHECK: # %bb.0: # %start
+; CHECK-NEXT: max.s $f0, $f12, $f12
+; CHECK-NEXT: mtc1 $zero, $f1
+; CHECK-NEXT: max.s $f0, $f0, $f1
+; CHECK-NEXT: lui $1, %hi($CPI1_0)
+; CHECK-NEXT: lwc1 $f1, %lo($CPI1_0)($1)
+; CHECK-NEXT: min.s $f0, $f0, $f1
+; CHECK-NEXT: lui $1, %hi($CPI1_1)
+; CHECK-NEXT: lwc1 $f1, %lo($CPI1_1)($1)
+; CHECK-NEXT: cmp.lt.s $f2, $f0, $f1
+; CHECK-NEXT: trunc.w.s $f3, $f0
+; CHECK-NEXT: mfc1 $1, $f3
+; CHECK-NEXT: mfc1 $2, $f2
+; CHECK-NEXT: selnez $1, $1, $2
+; CHECK-NEXT: sub.s $f0, $f0, $f1
+; CHECK-NEXT: trunc.w.s $f0, $f0
+; CHECK-NEXT: mfc1 $3, $f0
+; CHECK-NEXT: lui $4, 32768
+; CHECK-NEXT: xor $3, $3, $4
+; CHECK-NEXT: seleqz $2, $3, $2
+; CHECK-NEXT: jr $ra
+; CHECK-NEXT: or $2, $1, $2
+start:
+ %0 = tail call i16 @llvm.fptoui.sat.i16.f32(float %a)
+ ret i16 %0
+}
+
+define i16 @fcvt_h_s_sat_nnan(float nofpclass(nan) %a) {
+; CHECK-LABEL: fcvt_h_s_sat_nnan:
+; CHECK: # %bb.0: # %start
+; CHECK-NEXT: lui $1, %hi($CPI2_0)
+; CHECK-NEXT: lwc1 $f0, %lo($CPI2_0)($1)
+; CHECK-NEXT: max.s $f0, $f12, $f0
+; CHECK-NEXT: lui $1, %hi($CPI2_1)
+; CHECK-NEXT: lwc1 $f1, %lo($CPI2_1)($1)
+; CHECK-NEXT: min.s $f0, $f0, $f1
+; CHECK-NEXT: trunc.w.s $f0, $f0
+; CHECK-NEXT: jr $ra
+; CHECK-NEXT: mfc1 $2, $f0
+start:
+ %0 = tail call i16 @llvm.fptosi.sat.i16.f32(float %a)
+ ret i16 %0
+}
+
+define i16 @fcvt_hu_s_sat_nnan(float nofpclass(nan) %a) {
+; CHECK-LABEL: fcvt_hu_s_sat_nnan:
+; CHECK: # %bb.0: # %start
+; CHECK-NEXT: mtc1 $zero, $f0
+; CHECK-NEXT: max.s $f0, $f12, $f0
+; CHECK-NEXT: lui $1, %hi($CPI3_0)
+; CHECK-NEXT: lwc1 $f1, %lo($CPI3_0)($1)
+; CHECK-NEXT: min.s $f0, $f0, $f1
+; CHECK-NEXT: lui $1, %hi($CPI3_1)
+; CHECK-NEXT: lwc1 $f1, %lo($CPI3_1)($1)
+; CHECK-NEXT: cmp.lt.s $f2, $f0, $f1
+; CHECK-NEXT: trunc.w.s $f3, $f0
+; CHECK-NEXT: mfc1 $1, $f3
+; CHECK-NEXT: mfc1 $2, $f2
+; CHECK-NEXT: selnez $1, $1, $2
+; CHECK-NEXT: sub.s $f0, $f0, $f1
+; CHECK-NEXT: trunc.w.s $f0, $f0
+; CHECK-NEXT: mfc1 $3, $f0
+; CHECK-NEXT: lui $4, 32768
+; CHECK-NEXT: xor $3, $3, $4
+; CHECK-NEXT: seleqz $2, $3, $2
+; CHECK-NEXT: jr $ra
+; CHECK-NEXT: or $2, $1, $2
+start:
+ %0 = tail call i16 @llvm.fptoui.sat.i16.f32(float %a)
+ ret i16 %0
+}
+
+define i16 @fcvt_h_s_sat_nsnan(float nofpclass(snan) %a) {
+; CHECK-LABEL: fcvt_h_s_sat_nsnan:
+; CHECK: # %bb.0: # %start
+; CHECK-NEXT: lui $1, %hi($CPI4_0)
+; CHECK-NEXT: lwc1 $f0, %lo($CPI4_0)($1)
+; CHECK-NEXT: max.s $f0, $f12, $f0
+; CHECK-NEXT: lui $1, %hi($CPI4_1)
+; CHECK-NEXT: lwc1 $f1, %lo($CPI4_1)($1)
+; CHECK-NEXT: min.s $f0, $f0, $f1
+; CHECK-NEXT: trunc.w.s $f0, $f0
+; CHECK-NEXT: mfc1 $1, $f0
+; CHECK-NEXT: cmp.un.s $f0, $f12, $f12
+; CHECK-NEXT: mfc1 $2, $f0
+; CHECK-NEXT: jr $ra
+; CHECK-NEXT: seleqz $2, $1, $2
+start:
+ %0 = tail call i16 @llvm.fptosi.sat.i16.f32(float %a)
+ ret i16 %0
+}
+
+define i16 @fcvt_hu_s_sat_nsnan(float nofpclass(snan) %a) {
+; CHECK-LABEL: fcvt_hu_s_sat_nsnan:
+; CHECK: # %bb.0: # %start
+; CHECK-NEXT: mtc1 $zero, $f0
+; CHECK-NEXT: max.s $f0, $f12, $f0
+; CHECK-NEXT: lui $1, %hi($CPI5_0)
+; CHECK-NEXT: lwc1 $f1, %lo($CPI5_0)($1)
+; CHECK-NEXT: min.s $f0, $f0, $f1
+; CHECK-NEXT: lui $1, %hi($CPI5_1)
+; CHECK-NEXT: lwc1 $f1, %lo($CPI5_1)($1)
+; CHECK-NEXT: cmp.lt.s $f2, $f0, $f1
+; CHECK-NEXT: trunc.w.s $f3, $f0
+; CHECK-NEXT: mfc1 $1, $f3
+; CHECK-NEXT: mfc1 $2, $f2
+; CHECK-NEXT: selnez $1, $1, $2
+; CHECK-NEXT: sub.s $f0, $f0, $f1
+; CHECK-NEXT: trunc.w.s $f0, $f0
+; CHECK-NEXT: mfc1 $3, $f0
+; CHECK-NEXT: lui $4, 32768
+; CHECK-NEXT: xor $3, $3, $4
+; CHECK-NEXT: seleqz $2, $3, $2
+; CHECK-NEXT: jr $ra
+; CHECK-NEXT: or $2, $1, $2
+start:
+ %0 = tail call i16 @llvm.fptoui.sat.i16.f32(float %a)
+ ret i16 %0
+}
+
diff --git a/llvm/test/CodeGen/WebAssembly/Half2Int16.ll b/llvm/test/CodeGen/WebAssembly/Half2Int16.ll
new file mode 100644
index 0000000000000..7c08609f970ef
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/Half2Int16.ll
@@ -0,0 +1,119 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=wasm32 | FileCheck %s
+
+define i16 @fcvt_h_s_sat(float %a) {
+; CHECK-LABEL: fcvt_h_s_sat:
+; CHECK: .functype fcvt_h_s_sat (f32) -> (i32)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32.const -0x1p15
+; CHECK-NEXT: f32.max
+; CHECK-NEXT: f32.const 0x1.fffcp14
+; CHECK-NEXT: f32.min
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32.ne
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: # fallthrough-return
+start:
+ %0 = tail call i16 @llvm.fptosi.sat.i16.f32(float %a)
+ ret i16 %0
+}
+
+define i16 @fcvt_hu_s_sat(float %a) {
+; CHECK-LABEL: fcvt_hu_s_sat:
+; CHECK: .functype fcvt_hu_s_sat (f32) -> (i32)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32.const 0x0p0
+; CHECK-NEXT: f32.max
+; CHECK-NEXT: f32.const 0x1.fffep15
+; CHECK-NEXT: f32.min
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32.ne
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: # fallthrough-return
+start:
+ %0 = tail call i16 @llvm.fptoui.sat.i16.f32(float %a)
+ ret i16 %0
+}
+
+define i16 @fcvt_h_s_sat_nnan(float nofpclass(nan) %a) {
+; CHECK-LABEL: fcvt_h_s_sat_nnan:
+; CHECK: .functype fcvt_h_s_sat_nnan (f32) -> (i32)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32.const -0x1p15
+; CHECK-NEXT: f32.max
+; CHECK-NEXT: f32.const 0x1.fffcp14
+; CHECK-NEXT: f32.min
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: # fallthrough-return
+start:
+ %0 = tail call i16 @llvm.fptosi.sat.i16.f32(float %a)
+ ret i16 %0
+}
+
+define i16 @fcvt_hu_s_sat_nnan(float nofpclass(nan) %a) {
+; CHECK-LABEL: fcvt_hu_s_sat_nnan:
+; CHECK: .functype fcvt_hu_s_sat_nnan (f32) -> (i32)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32.const 0x0p0
+; CHECK-NEXT: f32.max
+; CHECK-NEXT: f32.const 0x1.fffep15
+; CHECK-NEXT: f32.min
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: # fallthrough-return
+start:
+ %0 = tail call i16 @llvm.fptoui.sat.i16.f32(float %a)
+ ret i16 %0
+}
+
+define i16 @fcvt_h_s_sat_nsnan(float nofpclass(snan) %a) {
+; CHECK-LABEL: fcvt_h_s_sat_nsnan:
+; CHECK: .functype fcvt_h_s_sat_nsnan (f32) -> (i32)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32.const -0x1p15
+; CHECK-NEXT: f32.max
+; CHECK-NEXT: f32.const 0x1.fffcp14
+; CHECK-NEXT: f32.min
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32.ne
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: # fallthrough-return
+start:
+ %0 = tail call i16 @llvm.fptosi.sat.i16.f32(float %a)
+ ret i16 %0
+}
+
+define i16 @fcvt_hu_s_sat_nsnan(float nofpclass(snan) %a) {
+; CHECK-LABEL: fcvt_hu_s_sat_nsnan:
+; CHECK: .functype fcvt_hu_s_sat_nsnan (f32) -> (i32)
+; CHECK-NEXT: # %bb.0: # %start
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32.const 0x0p0
+; CHECK-NEXT: f32.max
+; CHECK-NEXT: f32.const 0x1.fffep15
+; CHECK-NEXT: f32.min
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32.ne
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: # fallthrough-return
+start:
+ %0 = tail call i16 @llvm.fptoui.sat.i16.f32(float %a)
+ ret i16 %0
+}
+
>From aa08a78e548465abf958e1093a43fd3f75fdc2d3 Mon Sep 17 00:00:00 2001
From: Your Name <you at example.com>
Date: Thu, 15 May 2025 16:06:16 +0800
Subject: [PATCH 2/2] Update testcase
---
llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll | 226 ++++-------
llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll | 369 ++++++++++--------
.../CodeGen/Thumb2/mve-fptoui-sat-vector.ll | 120 +++---
3 files changed, 350 insertions(+), 365 deletions(-)
diff --git a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
index 5179f97624489..bccb0ad150509 100644
--- a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll
@@ -67,23 +67,22 @@ define i1 @test_signed_i1_f32(float %f) nounwind {
;
; VFP2-LABEL: test_signed_i1_f32:
; VFP2: @ %bb.0:
-; VFP2-NEXT: vmov s2, r0
; VFP2-NEXT: vmov.f32 s0, #-1.000000e+00
-; VFP2-NEXT: vcvt.s32.f32 s4, s2
-; VFP2-NEXT: vcmp.f32 s2, s0
-; VFP2-NEXT: vmov r0, s4
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt.w r0, #-1
-; VFP2-NEXT: vcmp.f32 s2, #0
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movgt r0, #0
+; VFP2-NEXT: vldr s4, .LCPI0_0
+; VFP2-NEXT: vmov s2, r0
; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.s32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: it vs
; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: bx lr
+; VFP2-NEXT: .p2align 2
+; VFP2-NEXT: @ %bb.1:
+; VFP2-NEXT: .LCPI0_0:
+; VFP2-NEXT: .long 0x00000000 @ float 0
;
; FP16-LABEL: test_signed_i1_f32:
; FP16: @ %bb.0:
@@ -157,21 +156,15 @@ define i8 @test_signed_i8_f32(float %f) nounwind {
;
; VFP2-LABEL: test_signed_i8_f32:
; VFP2: @ %bb.0:
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s2, .LCPI1_0
-; VFP2-NEXT: vldr s6, .LCPI1_1
-; VFP2-NEXT: vcvt.s32.f32 s4, s0
-; VFP2-NEXT: vcmp.f32 s0, s2
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s6
-; VFP2-NEXT: vmov r0, s4
-; VFP2-NEXT: it lt
-; VFP2-NEXT: mvnlt r0, #127
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movgt r0, #127
-; VFP2-NEXT: vcmp.f32 s0, s0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI1_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI1_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.s32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: it vs
; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: bx lr
@@ -256,22 +249,15 @@ define i13 @test_signed_i13_f32(float %f) nounwind {
;
; VFP2-LABEL: test_signed_i13_f32:
; VFP2: @ %bb.0:
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s2, .LCPI2_0
-; VFP2-NEXT: vldr s6, .LCPI2_1
-; VFP2-NEXT: vcvt.s32.f32 s4, s0
-; VFP2-NEXT: vcmp.f32 s0, s2
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s6
-; VFP2-NEXT: vmov r0, s4
-; VFP2-NEXT: itt lt
-; VFP2-NEXT: movwlt r0, #61440
-; VFP2-NEXT: movtlt r0, #65535
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movwgt r0, #4095
-; VFP2-NEXT: vcmp.f32 s0, s0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI2_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI2_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.s32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: it vs
; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: bx lr
@@ -356,22 +342,15 @@ define i16 @test_signed_i16_f32(float %f) nounwind {
;
; VFP2-LABEL: test_signed_i16_f32:
; VFP2: @ %bb.0:
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s2, .LCPI3_0
-; VFP2-NEXT: vldr s6, .LCPI3_1
-; VFP2-NEXT: vcvt.s32.f32 s4, s0
-; VFP2-NEXT: vcmp.f32 s0, s2
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s6
-; VFP2-NEXT: vmov r0, s4
-; VFP2-NEXT: itt lt
-; VFP2-NEXT: movwlt r0, #32768
-; VFP2-NEXT: movtlt r0, #65535
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movwgt r0, #32767
-; VFP2-NEXT: vcmp.f32 s0, s0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI3_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI3_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.s32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: it vs
; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: bx lr
@@ -456,23 +435,15 @@ define i19 @test_signed_i19_f32(float %f) nounwind {
;
; VFP2-LABEL: test_signed_i19_f32:
; VFP2: @ %bb.0:
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s2, .LCPI4_0
-; VFP2-NEXT: vldr s6, .LCPI4_1
-; VFP2-NEXT: vcvt.s32.f32 s4, s0
-; VFP2-NEXT: vcmp.f32 s0, s2
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s6
-; VFP2-NEXT: vmov r0, s4
-; VFP2-NEXT: itt lt
-; VFP2-NEXT: movlt r0, #0
-; VFP2-NEXT: movtlt r0, #65532
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s0
-; VFP2-NEXT: itt gt
-; VFP2-NEXT: movwgt r0, #65535
-; VFP2-NEXT: movtgt r0, #3
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI4_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI4_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.s32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: it vs
; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: bx lr
@@ -2492,23 +2463,22 @@ define i1 @test_signed_i1_f16(half %f) nounwind {
; VFP2-NEXT: .save {r7, lr}
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: bl __aeabi_h2f
-; VFP2-NEXT: vmov s2, r0
; VFP2-NEXT: vmov.f32 s0, #-1.000000e+00
-; VFP2-NEXT: vcvt.s32.f32 s4, s2
-; VFP2-NEXT: vcmp.f32 s2, s0
-; VFP2-NEXT: vmov r0, s4
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt.w r0, #-1
-; VFP2-NEXT: vcmp.f32 s2, #0
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movgt r0, #0
+; VFP2-NEXT: vldr s4, .LCPI20_0
+; VFP2-NEXT: vmov s2, r0
; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.s32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: it vs
; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: pop {r7, pc}
+; VFP2-NEXT: .p2align 2
+; VFP2-NEXT: @ %bb.1:
+; VFP2-NEXT: .LCPI20_0:
+; VFP2-NEXT: .long 0x00000000 @ float 0
;
; FP16-LABEL: test_signed_i1_f16:
; FP16: @ %bb.0:
@@ -2588,21 +2558,15 @@ define i8 @test_signed_i8_f16(half %f) nounwind {
; VFP2-NEXT: .save {r7, lr}
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: bl __aeabi_h2f
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s2, .LCPI21_0
-; VFP2-NEXT: vldr s6, .LCPI21_1
-; VFP2-NEXT: vcvt.s32.f32 s4, s0
-; VFP2-NEXT: vcmp.f32 s0, s2
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s6
-; VFP2-NEXT: vmov r0, s4
-; VFP2-NEXT: it lt
-; VFP2-NEXT: mvnlt r0, #127
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movgt r0, #127
-; VFP2-NEXT: vcmp.f32 s0, s0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI21_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI21_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.s32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: it vs
; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: pop {r7, pc}
@@ -2693,22 +2657,15 @@ define i13 @test_signed_i13_f16(half %f) nounwind {
; VFP2-NEXT: .save {r7, lr}
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: bl __aeabi_h2f
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s2, .LCPI22_0
-; VFP2-NEXT: vldr s6, .LCPI22_1
-; VFP2-NEXT: vcvt.s32.f32 s4, s0
-; VFP2-NEXT: vcmp.f32 s0, s2
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s6
-; VFP2-NEXT: vmov r0, s4
-; VFP2-NEXT: itt lt
-; VFP2-NEXT: movwlt r0, #61440
-; VFP2-NEXT: movtlt r0, #65535
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movwgt r0, #4095
-; VFP2-NEXT: vcmp.f32 s0, s0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI22_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI22_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.s32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: it vs
; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: pop {r7, pc}
@@ -2799,22 +2756,15 @@ define i16 @test_signed_i16_f16(half %f) nounwind {
; VFP2-NEXT: .save {r7, lr}
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: bl __aeabi_h2f
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s2, .LCPI23_0
-; VFP2-NEXT: vldr s6, .LCPI23_1
-; VFP2-NEXT: vcvt.s32.f32 s4, s0
-; VFP2-NEXT: vcmp.f32 s0, s2
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s6
-; VFP2-NEXT: vmov r0, s4
-; VFP2-NEXT: itt lt
-; VFP2-NEXT: movwlt r0, #32768
-; VFP2-NEXT: movtlt r0, #65535
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movwgt r0, #32767
-; VFP2-NEXT: vcmp.f32 s0, s0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI23_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI23_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.s32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: it vs
; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: pop {r7, pc}
@@ -2905,23 +2855,15 @@ define i19 @test_signed_i19_f16(half %f) nounwind {
; VFP2-NEXT: .save {r7, lr}
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: bl __aeabi_h2f
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s2, .LCPI24_0
-; VFP2-NEXT: vldr s6, .LCPI24_1
-; VFP2-NEXT: vcvt.s32.f32 s4, s0
-; VFP2-NEXT: vcmp.f32 s0, s2
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s6
-; VFP2-NEXT: vmov r0, s4
-; VFP2-NEXT: itt lt
-; VFP2-NEXT: movlt r0, #0
-; VFP2-NEXT: movtlt r0, #65532
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s0
-; VFP2-NEXT: itt gt
-; VFP2-NEXT: movwgt r0, #65535
-; VFP2-NEXT: movtgt r0, #3
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI24_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI24_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.s32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
; VFP2-NEXT: it vs
; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: pop {r7, pc}
diff --git a/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll b/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll
index 4cc5f943dadff..fdf2348ee416d 100644
--- a/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll
@@ -49,27 +49,31 @@ define i1 @test_signed_i1_f32(float %f) nounwind {
;
; VFP2-LABEL: test_signed_i1_f32:
; VFP2: @ %bb.0:
-; VFP2-NEXT: vmov s0, r0
; VFP2-NEXT: vmov.f32 s4, #1.000000e+00
-; VFP2-NEXT: vcvt.u32.f32 s2, s0
-; VFP2-NEXT: vcmp.f32 s0, #0
+; VFP2-NEXT: vldr s2, .LCPI0_0
+; VFP2-NEXT: vmov s0, r0
+; VFP2-NEXT: vmax.f32 d16, d0, d1
+; VFP2-NEXT: vcmp.f32 s0, s0
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s4
+; VFP2-NEXT: vmin.f32 d1, d16, d2
+; VFP2-NEXT: vcvt.u32.f32 s2, s2
; VFP2-NEXT: vmov r0, s2
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r0, #0
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movgt r0, #1
+; VFP2-NEXT: it vs
+; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: bx lr
+; VFP2-NEXT: .p2align 2
+; VFP2-NEXT: @ %bb.1:
+; VFP2-NEXT: .LCPI0_0:
+; VFP2-NEXT: .long 0x00000000 @ float 0
;
; FP16-LABEL: test_signed_i1_f32:
; FP16: @ %bb.0:
-; FP16-NEXT: vldr s0, .LCPI0_0
-; FP16-NEXT: vmov s2, r0
-; FP16-NEXT: vmov.f32 s4, #1.000000e+00
-; FP16-NEXT: vmaxnm.f32 s0, s2, s0
-; FP16-NEXT: vminnm.f32 s0, s0, s4
+; FP16-NEXT: vmov s4, r0
+; FP16-NEXT: vldr s2, .LCPI0_0
+; FP16-NEXT: vmaxnm.f32 s4, s4, s4
+; FP16-NEXT: vmov.f32 s0, #1.000000e+00
+; FP16-NEXT: vmaxnm.f32 s2, s4, s2
+; FP16-NEXT: vminnm.f32 s0, s2, s0
; FP16-NEXT: vcvt.u32.f32 s0, s0
; FP16-NEXT: vmov r0, s0
; FP16-NEXT: bx lr
@@ -115,40 +119,42 @@ define i8 @test_signed_i8_f32(float %f) nounwind {
;
; VFP2-LABEL: test_signed_i8_f32:
; VFP2: @ %bb.0:
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s4, .LCPI1_0
-; VFP2-NEXT: vcvt.u32.f32 s2, s0
-; VFP2-NEXT: vcmp.f32 s0, #0
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s4
-; VFP2-NEXT: vmov r0, s2
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r0, #0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI1_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI1_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movgt r0, #255
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.u32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
+; VFP2-NEXT: it vs
+; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: bx lr
; VFP2-NEXT: .p2align 2
; VFP2-NEXT: @ %bb.1:
; VFP2-NEXT: .LCPI1_0:
+; VFP2-NEXT: .long 0x00000000 @ float 0
+; VFP2-NEXT: .LCPI1_1:
; VFP2-NEXT: .long 0x437f0000 @ float 255
;
; FP16-LABEL: test_signed_i8_f32:
; FP16: @ %bb.0:
+; FP16-NEXT: vmov s4, r0
+; FP16-NEXT: vldr s2, .LCPI1_1
+; FP16-NEXT: vmaxnm.f32 s4, s4, s4
; FP16-NEXT: vldr s0, .LCPI1_0
-; FP16-NEXT: vmov s2, r0
-; FP16-NEXT: vldr s4, .LCPI1_1
-; FP16-NEXT: vmaxnm.f32 s0, s2, s0
-; FP16-NEXT: vminnm.f32 s0, s0, s4
+; FP16-NEXT: vmaxnm.f32 s2, s4, s2
+; FP16-NEXT: vminnm.f32 s0, s2, s0
; FP16-NEXT: vcvt.u32.f32 s0, s0
; FP16-NEXT: vmov r0, s0
; FP16-NEXT: bx lr
; FP16-NEXT: .p2align 2
; FP16-NEXT: @ %bb.1:
; FP16-NEXT: .LCPI1_0:
-; FP16-NEXT: .long 0x00000000 @ float 0
-; FP16-NEXT: .LCPI1_1:
; FP16-NEXT: .long 0x437f0000 @ float 255
+; FP16-NEXT: .LCPI1_1:
+; FP16-NEXT: .long 0x00000000 @ float 0
%x = call i8 @llvm.fptoui.sat.i8.f32(float %f)
ret i8 %x
}
@@ -189,40 +195,42 @@ define i13 @test_signed_i13_f32(float %f) nounwind {
;
; VFP2-LABEL: test_signed_i13_f32:
; VFP2: @ %bb.0:
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s4, .LCPI2_0
-; VFP2-NEXT: vcvt.u32.f32 s2, s0
-; VFP2-NEXT: vcmp.f32 s0, #0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI2_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI2_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s4
-; VFP2-NEXT: vmov r0, s2
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r0, #0
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movwgt r0, #8191
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.u32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
+; VFP2-NEXT: it vs
+; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: bx lr
; VFP2-NEXT: .p2align 2
; VFP2-NEXT: @ %bb.1:
; VFP2-NEXT: .LCPI2_0:
+; VFP2-NEXT: .long 0x00000000 @ float 0
+; VFP2-NEXT: .LCPI2_1:
; VFP2-NEXT: .long 0x45fff800 @ float 8191
;
; FP16-LABEL: test_signed_i13_f32:
; FP16: @ %bb.0:
+; FP16-NEXT: vmov s4, r0
+; FP16-NEXT: vldr s2, .LCPI2_1
+; FP16-NEXT: vmaxnm.f32 s4, s4, s4
; FP16-NEXT: vldr s0, .LCPI2_0
-; FP16-NEXT: vmov s2, r0
-; FP16-NEXT: vldr s4, .LCPI2_1
-; FP16-NEXT: vmaxnm.f32 s0, s2, s0
-; FP16-NEXT: vminnm.f32 s0, s0, s4
+; FP16-NEXT: vmaxnm.f32 s2, s4, s2
+; FP16-NEXT: vminnm.f32 s0, s2, s0
; FP16-NEXT: vcvt.u32.f32 s0, s0
; FP16-NEXT: vmov r0, s0
; FP16-NEXT: bx lr
; FP16-NEXT: .p2align 2
; FP16-NEXT: @ %bb.1:
; FP16-NEXT: .LCPI2_0:
-; FP16-NEXT: .long 0x00000000 @ float 0
-; FP16-NEXT: .LCPI2_1:
; FP16-NEXT: .long 0x45fff800 @ float 8191
+; FP16-NEXT: .LCPI2_1:
+; FP16-NEXT: .long 0x00000000 @ float 0
%x = call i13 @llvm.fptoui.sat.i13.f32(float %f)
ret i13 %x
}
@@ -263,40 +271,42 @@ define i16 @test_signed_i16_f32(float %f) nounwind {
;
; VFP2-LABEL: test_signed_i16_f32:
; VFP2: @ %bb.0:
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s4, .LCPI3_0
-; VFP2-NEXT: vcvt.u32.f32 s2, s0
-; VFP2-NEXT: vcmp.f32 s0, #0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI3_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI3_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s4
-; VFP2-NEXT: vmov r0, s2
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r0, #0
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movwgt r0, #65535
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.u32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
+; VFP2-NEXT: it vs
+; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: bx lr
; VFP2-NEXT: .p2align 2
; VFP2-NEXT: @ %bb.1:
; VFP2-NEXT: .LCPI3_0:
+; VFP2-NEXT: .long 0x00000000 @ float 0
+; VFP2-NEXT: .LCPI3_1:
; VFP2-NEXT: .long 0x477fff00 @ float 65535
;
; FP16-LABEL: test_signed_i16_f32:
; FP16: @ %bb.0:
+; FP16-NEXT: vmov s4, r0
+; FP16-NEXT: vldr s2, .LCPI3_1
+; FP16-NEXT: vmaxnm.f32 s4, s4, s4
; FP16-NEXT: vldr s0, .LCPI3_0
-; FP16-NEXT: vmov s2, r0
-; FP16-NEXT: vldr s4, .LCPI3_1
-; FP16-NEXT: vmaxnm.f32 s0, s2, s0
-; FP16-NEXT: vminnm.f32 s0, s0, s4
+; FP16-NEXT: vmaxnm.f32 s2, s4, s2
+; FP16-NEXT: vminnm.f32 s0, s2, s0
; FP16-NEXT: vcvt.u32.f32 s0, s0
; FP16-NEXT: vmov r0, s0
; FP16-NEXT: bx lr
; FP16-NEXT: .p2align 2
; FP16-NEXT: @ %bb.1:
; FP16-NEXT: .LCPI3_0:
-; FP16-NEXT: .long 0x00000000 @ float 0
-; FP16-NEXT: .LCPI3_1:
; FP16-NEXT: .long 0x477fff00 @ float 65535
+; FP16-NEXT: .LCPI3_1:
+; FP16-NEXT: .long 0x00000000 @ float 0
%x = call i16 @llvm.fptoui.sat.i16.f32(float %f)
ret i16 %x
}
@@ -337,41 +347,42 @@ define i19 @test_signed_i19_f32(float %f) nounwind {
;
; VFP2-LABEL: test_signed_i19_f32:
; VFP2: @ %bb.0:
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s4, .LCPI4_0
-; VFP2-NEXT: vcvt.u32.f32 s2, s0
-; VFP2-NEXT: vcmp.f32 s0, #0
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s4
-; VFP2-NEXT: vmov r0, s2
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r0, #0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI4_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI4_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: itt gt
-; VFP2-NEXT: movwgt r0, #65535
-; VFP2-NEXT: movtgt r0, #7
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.u32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
+; VFP2-NEXT: it vs
+; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: bx lr
; VFP2-NEXT: .p2align 2
; VFP2-NEXT: @ %bb.1:
; VFP2-NEXT: .LCPI4_0:
+; VFP2-NEXT: .long 0x00000000 @ float 0
+; VFP2-NEXT: .LCPI4_1:
; VFP2-NEXT: .long 0x48ffffe0 @ float 524287
;
; FP16-LABEL: test_signed_i19_f32:
; FP16: @ %bb.0:
+; FP16-NEXT: vmov s4, r0
+; FP16-NEXT: vldr s2, .LCPI4_1
+; FP16-NEXT: vmaxnm.f32 s4, s4, s4
; FP16-NEXT: vldr s0, .LCPI4_0
-; FP16-NEXT: vmov s2, r0
-; FP16-NEXT: vldr s4, .LCPI4_1
-; FP16-NEXT: vmaxnm.f32 s0, s2, s0
-; FP16-NEXT: vminnm.f32 s0, s0, s4
+; FP16-NEXT: vmaxnm.f32 s2, s4, s2
+; FP16-NEXT: vminnm.f32 s0, s2, s0
; FP16-NEXT: vcvt.u32.f32 s0, s0
; FP16-NEXT: vmov r0, s0
; FP16-NEXT: bx lr
; FP16-NEXT: .p2align 2
; FP16-NEXT: @ %bb.1:
; FP16-NEXT: .LCPI4_0:
-; FP16-NEXT: .long 0x00000000 @ float 0
-; FP16-NEXT: .LCPI4_1:
; FP16-NEXT: .long 0x48ffffe0 @ float 524287
+; FP16-NEXT: .LCPI4_1:
+; FP16-NEXT: .long 0x00000000 @ float 0
%x = call i19 @llvm.fptoui.sat.i19.f32(float %f)
ret i19 %x
}
@@ -873,8 +884,9 @@ define i1 @test_signed_i1_f64(double %f) nounwind {
; FP16-LABEL: test_signed_i1_f64:
; FP16: @ %bb.0:
; FP16-NEXT: vmov.f64 d0, #1.000000e+00
-; FP16-NEXT: vldr d1, .LCPI10_0
; FP16-NEXT: vmov d2, r0, r1
+; FP16-NEXT: vldr d1, .LCPI10_0
+; FP16-NEXT: vmaxnm.f64 d2, d2, d2
; FP16-NEXT: vmaxnm.f64 d1, d2, d1
; FP16-NEXT: vminnm.f64 d0, d1, d0
; FP16-NEXT: vcvt.u32.f64 s0, d0
@@ -955,22 +967,23 @@ define i8 @test_signed_i8_f64(double %f) nounwind {
;
; FP16-LABEL: test_signed_i8_f64:
; FP16: @ %bb.0:
+; FP16-NEXT: vmov d2, r0, r1
+; FP16-NEXT: vldr d1, .LCPI11_1
+; FP16-NEXT: vmaxnm.f64 d2, d2, d2
; FP16-NEXT: vldr d0, .LCPI11_0
-; FP16-NEXT: vmov d1, r0, r1
-; FP16-NEXT: vldr d2, .LCPI11_1
-; FP16-NEXT: vmaxnm.f64 d0, d1, d0
-; FP16-NEXT: vminnm.f64 d0, d0, d2
+; FP16-NEXT: vmaxnm.f64 d1, d2, d1
+; FP16-NEXT: vminnm.f64 d0, d1, d0
; FP16-NEXT: vcvt.u32.f64 s0, d0
; FP16-NEXT: vmov r0, s0
; FP16-NEXT: bx lr
; FP16-NEXT: .p2align 3
; FP16-NEXT: @ %bb.1:
; FP16-NEXT: .LCPI11_0:
-; FP16-NEXT: .long 0 @ double 0
-; FP16-NEXT: .long 0
-; FP16-NEXT: .LCPI11_1:
; FP16-NEXT: .long 0 @ double 255
; FP16-NEXT: .long 1081073664
+; FP16-NEXT: .LCPI11_1:
+; FP16-NEXT: .long 0 @ double 0
+; FP16-NEXT: .long 0
%x = call i8 @llvm.fptoui.sat.i8.f64(double %f)
ret i8 %x
}
@@ -1043,22 +1056,23 @@ define i13 @test_signed_i13_f64(double %f) nounwind {
;
; FP16-LABEL: test_signed_i13_f64:
; FP16: @ %bb.0:
+; FP16-NEXT: vmov d2, r0, r1
+; FP16-NEXT: vldr d1, .LCPI12_1
+; FP16-NEXT: vmaxnm.f64 d2, d2, d2
; FP16-NEXT: vldr d0, .LCPI12_0
-; FP16-NEXT: vmov d1, r0, r1
-; FP16-NEXT: vldr d2, .LCPI12_1
-; FP16-NEXT: vmaxnm.f64 d0, d1, d0
-; FP16-NEXT: vminnm.f64 d0, d0, d2
+; FP16-NEXT: vmaxnm.f64 d1, d2, d1
+; FP16-NEXT: vminnm.f64 d0, d1, d0
; FP16-NEXT: vcvt.u32.f64 s0, d0
; FP16-NEXT: vmov r0, s0
; FP16-NEXT: bx lr
; FP16-NEXT: .p2align 3
; FP16-NEXT: @ %bb.1:
; FP16-NEXT: .LCPI12_0:
-; FP16-NEXT: .long 0 @ double 0
-; FP16-NEXT: .long 0
-; FP16-NEXT: .LCPI12_1:
; FP16-NEXT: .long 0 @ double 8191
; FP16-NEXT: .long 1086324480
+; FP16-NEXT: .LCPI12_1:
+; FP16-NEXT: .long 0 @ double 0
+; FP16-NEXT: .long 0
%x = call i13 @llvm.fptoui.sat.i13.f64(double %f)
ret i13 %x
}
@@ -1131,22 +1145,23 @@ define i16 @test_signed_i16_f64(double %f) nounwind {
;
; FP16-LABEL: test_signed_i16_f64:
; FP16: @ %bb.0:
+; FP16-NEXT: vmov d2, r0, r1
+; FP16-NEXT: vldr d1, .LCPI13_1
+; FP16-NEXT: vmaxnm.f64 d2, d2, d2
; FP16-NEXT: vldr d0, .LCPI13_0
-; FP16-NEXT: vmov d1, r0, r1
-; FP16-NEXT: vldr d2, .LCPI13_1
-; FP16-NEXT: vmaxnm.f64 d0, d1, d0
-; FP16-NEXT: vminnm.f64 d0, d0, d2
+; FP16-NEXT: vmaxnm.f64 d1, d2, d1
+; FP16-NEXT: vminnm.f64 d0, d1, d0
; FP16-NEXT: vcvt.u32.f64 s0, d0
; FP16-NEXT: vmov r0, s0
; FP16-NEXT: bx lr
; FP16-NEXT: .p2align 3
; FP16-NEXT: @ %bb.1:
; FP16-NEXT: .LCPI13_0:
-; FP16-NEXT: .long 0 @ double 0
-; FP16-NEXT: .long 0
-; FP16-NEXT: .LCPI13_1:
; FP16-NEXT: .long 0 @ double 65535
; FP16-NEXT: .long 1089470432
+; FP16-NEXT: .LCPI13_1:
+; FP16-NEXT: .long 0 @ double 0
+; FP16-NEXT: .long 0
%x = call i16 @llvm.fptoui.sat.i16.f64(double %f)
ret i16 %x
}
@@ -1220,22 +1235,23 @@ define i19 @test_signed_i19_f64(double %f) nounwind {
;
; FP16-LABEL: test_signed_i19_f64:
; FP16: @ %bb.0:
+; FP16-NEXT: vmov d2, r0, r1
+; FP16-NEXT: vldr d1, .LCPI14_1
+; FP16-NEXT: vmaxnm.f64 d2, d2, d2
; FP16-NEXT: vldr d0, .LCPI14_0
-; FP16-NEXT: vmov d1, r0, r1
-; FP16-NEXT: vldr d2, .LCPI14_1
-; FP16-NEXT: vmaxnm.f64 d0, d1, d0
-; FP16-NEXT: vminnm.f64 d0, d0, d2
+; FP16-NEXT: vmaxnm.f64 d1, d2, d1
+; FP16-NEXT: vminnm.f64 d0, d1, d0
; FP16-NEXT: vcvt.u32.f64 s0, d0
; FP16-NEXT: vmov r0, s0
; FP16-NEXT: bx lr
; FP16-NEXT: .p2align 3
; FP16-NEXT: @ %bb.1:
; FP16-NEXT: .LCPI14_0:
-; FP16-NEXT: .long 0 @ double 0
-; FP16-NEXT: .long 0
-; FP16-NEXT: .LCPI14_1:
; FP16-NEXT: .long 0 @ double 524287
; FP16-NEXT: .long 1092616188
+; FP16-NEXT: .LCPI14_1:
+; FP16-NEXT: .long 0 @ double 0
+; FP16-NEXT: .long 0
%x = call i19 @llvm.fptoui.sat.i19.f64(double %f)
ret i19 %x
}
@@ -1393,22 +1409,23 @@ define i50 @test_signed_i50_f64(double %f) nounwind {
; FP16: @ %bb.0:
; FP16-NEXT: .save {r7, lr}
; FP16-NEXT: push {r7, lr}
+; FP16-NEXT: vmov d2, r0, r1
+; FP16-NEXT: vldr d1, .LCPI16_1
+; FP16-NEXT: vmaxnm.f64 d2, d2, d2
; FP16-NEXT: vldr d0, .LCPI16_0
-; FP16-NEXT: vmov d1, r0, r1
-; FP16-NEXT: vldr d2, .LCPI16_1
-; FP16-NEXT: vmaxnm.f64 d0, d1, d0
-; FP16-NEXT: vminnm.f64 d0, d0, d2
+; FP16-NEXT: vmaxnm.f64 d1, d2, d1
+; FP16-NEXT: vminnm.f64 d0, d1, d0
; FP16-NEXT: vmov r0, r1, d0
; FP16-NEXT: bl __aeabi_d2ulz
; FP16-NEXT: pop {r7, pc}
; FP16-NEXT: .p2align 3
; FP16-NEXT: @ %bb.1:
; FP16-NEXT: .LCPI16_0:
-; FP16-NEXT: .long 0 @ double 0
-; FP16-NEXT: .long 0
-; FP16-NEXT: .LCPI16_1:
; FP16-NEXT: .long 4294967288 @ double 1125899906842623
; FP16-NEXT: .long 1125122047
+; FP16-NEXT: .LCPI16_1:
+; FP16-NEXT: .long 0 @ double 0
+; FP16-NEXT: .long 0
%x = call i50 @llvm.fptoui.sat.i50.f64(double %f)
ret i50 %x
}
@@ -1864,19 +1881,22 @@ define i1 @test_signed_i1_f16(half %f) nounwind {
; VFP2-NEXT: .save {r7, lr}
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: bl __aeabi_h2f
-; VFP2-NEXT: vmov s0, r0
; VFP2-NEXT: vmov.f32 s4, #1.000000e+00
-; VFP2-NEXT: vcvt.u32.f32 s2, s0
-; VFP2-NEXT: vcmp.f32 s0, #0
+; VFP2-NEXT: vldr s2, .LCPI20_0
+; VFP2-NEXT: vmov s0, r0
+; VFP2-NEXT: vmax.f32 d16, d0, d1
+; VFP2-NEXT: vcmp.f32 s0, s0
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s4
+; VFP2-NEXT: vmin.f32 d1, d16, d2
+; VFP2-NEXT: vcvt.u32.f32 s2, s2
; VFP2-NEXT: vmov r0, s2
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r0, #0
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movgt r0, #1
+; VFP2-NEXT: it vs
+; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: pop {r7, pc}
+; VFP2-NEXT: .p2align 2
+; VFP2-NEXT: @ %bb.1:
+; VFP2-NEXT: .LCPI20_0:
+; VFP2-NEXT: .long 0x00000000 @ float 0
;
; FP16-LABEL: test_signed_i1_f16:
; FP16: @ %bb.0:
@@ -1936,22 +1956,23 @@ define i8 @test_signed_i8_f16(half %f) nounwind {
; VFP2-NEXT: .save {r7, lr}
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: bl __aeabi_h2f
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s4, .LCPI21_0
-; VFP2-NEXT: vcvt.u32.f32 s2, s0
-; VFP2-NEXT: vcmp.f32 s0, #0
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s4
-; VFP2-NEXT: vmov r0, s2
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r0, #0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI21_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI21_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movgt r0, #255
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.u32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
+; VFP2-NEXT: it vs
+; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: pop {r7, pc}
; VFP2-NEXT: .p2align 2
; VFP2-NEXT: @ %bb.1:
; VFP2-NEXT: .LCPI21_0:
+; VFP2-NEXT: .long 0x00000000 @ float 0
+; VFP2-NEXT: .LCPI21_1:
; VFP2-NEXT: .long 0x437f0000 @ float 255
;
; FP16-LABEL: test_signed_i8_f16:
@@ -2016,22 +2037,23 @@ define i13 @test_signed_i13_f16(half %f) nounwind {
; VFP2-NEXT: .save {r7, lr}
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: bl __aeabi_h2f
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s4, .LCPI22_0
-; VFP2-NEXT: vcvt.u32.f32 s2, s0
-; VFP2-NEXT: vcmp.f32 s0, #0
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s4
-; VFP2-NEXT: vmov r0, s2
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r0, #0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI22_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI22_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movwgt r0, #8191
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.u32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
+; VFP2-NEXT: it vs
+; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: pop {r7, pc}
; VFP2-NEXT: .p2align 2
; VFP2-NEXT: @ %bb.1:
; VFP2-NEXT: .LCPI22_0:
+; VFP2-NEXT: .long 0x00000000 @ float 0
+; VFP2-NEXT: .LCPI22_1:
; VFP2-NEXT: .long 0x45fff800 @ float 8191
;
; FP16-LABEL: test_signed_i13_f16:
@@ -2096,22 +2118,23 @@ define i16 @test_signed_i16_f16(half %f) nounwind {
; VFP2-NEXT: .save {r7, lr}
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: bl __aeabi_h2f
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s4, .LCPI23_0
-; VFP2-NEXT: vcvt.u32.f32 s2, s0
-; VFP2-NEXT: vcmp.f32 s0, #0
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s4
-; VFP2-NEXT: vmov r0, s2
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r0, #0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI23_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI23_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: it gt
-; VFP2-NEXT: movwgt r0, #65535
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.u32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
+; VFP2-NEXT: it vs
+; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: pop {r7, pc}
; VFP2-NEXT: .p2align 2
; VFP2-NEXT: @ %bb.1:
; VFP2-NEXT: .LCPI23_0:
+; VFP2-NEXT: .long 0x00000000 @ float 0
+; VFP2-NEXT: .LCPI23_1:
; VFP2-NEXT: .long 0x477fff00 @ float 65535
;
; FP16-LABEL: test_signed_i16_f16:
@@ -2176,23 +2199,23 @@ define i19 @test_signed_i19_f16(half %f) nounwind {
; VFP2-NEXT: .save {r7, lr}
; VFP2-NEXT: push {r7, lr}
; VFP2-NEXT: bl __aeabi_h2f
-; VFP2-NEXT: vmov s0, r0
-; VFP2-NEXT: vldr s4, .LCPI24_0
-; VFP2-NEXT: vcvt.u32.f32 s2, s0
-; VFP2-NEXT: vcmp.f32 s0, #0
-; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: vcmp.f32 s0, s4
-; VFP2-NEXT: vmov r0, s2
-; VFP2-NEXT: it lt
-; VFP2-NEXT: movlt r0, #0
+; VFP2-NEXT: vmov s2, r0
+; VFP2-NEXT: vldr s0, .LCPI24_0
+; VFP2-NEXT: vmax.f32 d16, d1, d0
+; VFP2-NEXT: vldr s4, .LCPI24_1
+; VFP2-NEXT: vcmp.f32 s2, s2
; VFP2-NEXT: vmrs APSR_nzcv, fpscr
-; VFP2-NEXT: itt gt
-; VFP2-NEXT: movwgt r0, #65535
-; VFP2-NEXT: movtgt r0, #7
+; VFP2-NEXT: vmin.f32 d0, d16, d2
+; VFP2-NEXT: vcvt.u32.f32 s0, s0
+; VFP2-NEXT: vmov r0, s0
+; VFP2-NEXT: it vs
+; VFP2-NEXT: movvs r0, #0
; VFP2-NEXT: pop {r7, pc}
; VFP2-NEXT: .p2align 2
; VFP2-NEXT: @ %bb.1:
; VFP2-NEXT: .LCPI24_0:
+; VFP2-NEXT: .long 0x00000000 @ float 0
+; VFP2-NEXT: .LCPI24_1:
; VFP2-NEXT: .long 0x48ffffe0 @ float 524287
;
; FP16-LABEL: test_signed_i19_f16:
diff --git a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
index ee040feca4240..6ab88c3b4a88c 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
@@ -1190,19 +1190,23 @@ define arm_aapcs_vfpcc <4 x i1> @test_unsigned_v4f32_v4i1(<4 x float> %f) {
; CHECK-LABEL: test_unsigned_v4f32_v4i1:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr s4, .LCPI22_0
+; CHECK-NEXT: vmaxnm.f32 s0, s0, s0
; CHECK-NEXT: vmov.f32 s6, #1.000000e+00
-; CHECK-NEXT: movs r1, #0
+; CHECK-NEXT: vmaxnm.f32 s8, s3, s3
; CHECK-NEXT: vmaxnm.f32 s0, s0, s4
-; CHECK-NEXT: vmaxnm.f32 s8, s3, s4
+; CHECK-NEXT: vmaxnm.f32 s2, s2, s2
; CHECK-NEXT: vminnm.f32 s0, s0, s6
-; CHECK-NEXT: vmaxnm.f32 s2, s2, s4
+; CHECK-NEXT: vmaxnm.f32 s10, s1, s1
; CHECK-NEXT: vcvt.u32.f32 s0, s0
-; CHECK-NEXT: vmaxnm.f32 s4, s1, s4
+; CHECK-NEXT: vmaxnm.f32 s8, s8, s4
+; CHECK-NEXT: vmaxnm.f32 s2, s2, s4
+; CHECK-NEXT: vmaxnm.f32 s4, s10, s4
; CHECK-NEXT: vminnm.f32 s4, s4, s6
-; CHECK-NEXT: vminnm.f32 s2, s2, s6
+; CHECK-NEXT: movs r1, #0
; CHECK-NEXT: vcvt.u32.f32 s4, s4
-; CHECK-NEXT: vminnm.f32 s8, s8, s6
+; CHECK-NEXT: vminnm.f32 s2, s2, s6
; CHECK-NEXT: vcvt.u32.f32 s2, s2
+; CHECK-NEXT: vminnm.f32 s8, s8, s6
; CHECK-NEXT: vcvt.u32.f32 s8, s8
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: and r2, r2, #1
@@ -1233,16 +1237,20 @@ define arm_aapcs_vfpcc <4 x i1> @test_unsigned_v4f32_v4i1(<4 x float> %f) {
define arm_aapcs_vfpcc <4 x i8> @test_unsigned_v4f32_v4i8(<4 x float> %f) {
; CHECK-MVE-LABEL: test_unsigned_v4f32_v4i8:
; CHECK-MVE: @ %bb.0:
-; CHECK-MVE-NEXT: vldr s4, .LCPI23_0
; CHECK-MVE-NEXT: vldr s6, .LCPI23_1
-; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s4
-; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s4
-; CHECK-MVE-NEXT: vmaxnm.f32 s8, s3, s4
-; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s6
-; CHECK-MVE-NEXT: vminnm.f32 s0, s0, s6
-; CHECK-MVE-NEXT: vmaxnm.f32 s4, s1, s4
-; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s6
-; CHECK-MVE-NEXT: vminnm.f32 s4, s4, s6
+; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s2
+; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s0
+; CHECK-MVE-NEXT: vldr s4, .LCPI23_0
+; CHECK-MVE-NEXT: vmaxnm.f32 s8, s3, s3
+; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s6
+; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s6
+; CHECK-MVE-NEXT: vmaxnm.f32 s10, s1, s1
+; CHECK-MVE-NEXT: vmaxnm.f32 s8, s8, s6
+; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s4
+; CHECK-MVE-NEXT: vminnm.f32 s0, s0, s4
+; CHECK-MVE-NEXT: vmaxnm.f32 s6, s10, s6
+; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s4
+; CHECK-MVE-NEXT: vminnm.f32 s4, s6, s4
; CHECK-MVE-NEXT: vcvt.u32.f32 s2, s2
; CHECK-MVE-NEXT: vcvt.u32.f32 s0, s0
; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s8
@@ -1257,9 +1265,9 @@ define arm_aapcs_vfpcc <4 x i8> @test_unsigned_v4f32_v4i8(<4 x float> %f) {
; CHECK-MVE-NEXT: .p2align 2
; CHECK-MVE-NEXT: @ %bb.1:
; CHECK-MVE-NEXT: .LCPI23_0:
-; CHECK-MVE-NEXT: .long 0x00000000 @ float 0
-; CHECK-MVE-NEXT: .LCPI23_1:
; CHECK-MVE-NEXT: .long 0x437f0000 @ float 255
+; CHECK-MVE-NEXT: .LCPI23_1:
+; CHECK-MVE-NEXT: .long 0x00000000 @ float 0
;
; CHECK-MVEFP-LABEL: test_unsigned_v4f32_v4i8:
; CHECK-MVEFP: @ %bb.0:
@@ -1274,16 +1282,20 @@ define arm_aapcs_vfpcc <4 x i8> @test_unsigned_v4f32_v4i8(<4 x float> %f) {
define arm_aapcs_vfpcc <4 x i13> @test_unsigned_v4f32_v4i13(<4 x float> %f) {
; CHECK-MVE-LABEL: test_unsigned_v4f32_v4i13:
; CHECK-MVE: @ %bb.0:
-; CHECK-MVE-NEXT: vldr s4, .LCPI24_0
; CHECK-MVE-NEXT: vldr s6, .LCPI24_1
-; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s4
-; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s4
-; CHECK-MVE-NEXT: vmaxnm.f32 s8, s3, s4
-; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s6
-; CHECK-MVE-NEXT: vminnm.f32 s0, s0, s6
-; CHECK-MVE-NEXT: vmaxnm.f32 s4, s1, s4
-; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s6
-; CHECK-MVE-NEXT: vminnm.f32 s4, s4, s6
+; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s2
+; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s0
+; CHECK-MVE-NEXT: vldr s4, .LCPI24_0
+; CHECK-MVE-NEXT: vmaxnm.f32 s8, s3, s3
+; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s6
+; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s6
+; CHECK-MVE-NEXT: vmaxnm.f32 s10, s1, s1
+; CHECK-MVE-NEXT: vmaxnm.f32 s8, s8, s6
+; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s4
+; CHECK-MVE-NEXT: vminnm.f32 s0, s0, s4
+; CHECK-MVE-NEXT: vmaxnm.f32 s6, s10, s6
+; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s4
+; CHECK-MVE-NEXT: vminnm.f32 s4, s6, s4
; CHECK-MVE-NEXT: vcvt.u32.f32 s2, s2
; CHECK-MVE-NEXT: vcvt.u32.f32 s0, s0
; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s8
@@ -1298,9 +1310,9 @@ define arm_aapcs_vfpcc <4 x i13> @test_unsigned_v4f32_v4i13(<4 x float> %f) {
; CHECK-MVE-NEXT: .p2align 2
; CHECK-MVE-NEXT: @ %bb.1:
; CHECK-MVE-NEXT: .LCPI24_0:
-; CHECK-MVE-NEXT: .long 0x00000000 @ float 0
-; CHECK-MVE-NEXT: .LCPI24_1:
; CHECK-MVE-NEXT: .long 0x45fff800 @ float 8191
+; CHECK-MVE-NEXT: .LCPI24_1:
+; CHECK-MVE-NEXT: .long 0x00000000 @ float 0
;
; CHECK-MVEFP-LABEL: test_unsigned_v4f32_v4i13:
; CHECK-MVEFP: @ %bb.0:
@@ -1315,16 +1327,20 @@ define arm_aapcs_vfpcc <4 x i13> @test_unsigned_v4f32_v4i13(<4 x float> %f) {
define arm_aapcs_vfpcc <4 x i16> @test_unsigned_v4f32_v4i16(<4 x float> %f) {
; CHECK-MVE-LABEL: test_unsigned_v4f32_v4i16:
; CHECK-MVE: @ %bb.0:
-; CHECK-MVE-NEXT: vldr s4, .LCPI25_0
; CHECK-MVE-NEXT: vldr s6, .LCPI25_1
-; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s4
-; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s4
-; CHECK-MVE-NEXT: vmaxnm.f32 s8, s3, s4
-; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s6
-; CHECK-MVE-NEXT: vminnm.f32 s0, s0, s6
-; CHECK-MVE-NEXT: vmaxnm.f32 s4, s1, s4
-; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s6
-; CHECK-MVE-NEXT: vminnm.f32 s4, s4, s6
+; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s2
+; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s0
+; CHECK-MVE-NEXT: vldr s4, .LCPI25_0
+; CHECK-MVE-NEXT: vmaxnm.f32 s8, s3, s3
+; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s6
+; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s6
+; CHECK-MVE-NEXT: vmaxnm.f32 s10, s1, s1
+; CHECK-MVE-NEXT: vmaxnm.f32 s8, s8, s6
+; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s4
+; CHECK-MVE-NEXT: vminnm.f32 s0, s0, s4
+; CHECK-MVE-NEXT: vmaxnm.f32 s6, s10, s6
+; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s4
+; CHECK-MVE-NEXT: vminnm.f32 s4, s6, s4
; CHECK-MVE-NEXT: vcvt.u32.f32 s2, s2
; CHECK-MVE-NEXT: vcvt.u32.f32 s0, s0
; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s8
@@ -1339,9 +1355,9 @@ define arm_aapcs_vfpcc <4 x i16> @test_unsigned_v4f32_v4i16(<4 x float> %f) {
; CHECK-MVE-NEXT: .p2align 2
; CHECK-MVE-NEXT: @ %bb.1:
; CHECK-MVE-NEXT: .LCPI25_0:
-; CHECK-MVE-NEXT: .long 0x00000000 @ float 0
-; CHECK-MVE-NEXT: .LCPI25_1:
; CHECK-MVE-NEXT: .long 0x477fff00 @ float 65535
+; CHECK-MVE-NEXT: .LCPI25_1:
+; CHECK-MVE-NEXT: .long 0x00000000 @ float 0
;
; CHECK-MVEFP-LABEL: test_unsigned_v4f32_v4i16:
; CHECK-MVEFP: @ %bb.0:
@@ -1356,16 +1372,20 @@ define arm_aapcs_vfpcc <4 x i16> @test_unsigned_v4f32_v4i16(<4 x float> %f) {
define arm_aapcs_vfpcc <4 x i19> @test_unsigned_v4f32_v4i19(<4 x float> %f) {
; CHECK-MVE-LABEL: test_unsigned_v4f32_v4i19:
; CHECK-MVE: @ %bb.0:
-; CHECK-MVE-NEXT: vldr s4, .LCPI26_0
; CHECK-MVE-NEXT: vldr s6, .LCPI26_1
-; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s4
-; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s4
-; CHECK-MVE-NEXT: vmaxnm.f32 s8, s3, s4
-; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s6
-; CHECK-MVE-NEXT: vminnm.f32 s0, s0, s6
-; CHECK-MVE-NEXT: vmaxnm.f32 s4, s1, s4
-; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s6
-; CHECK-MVE-NEXT: vminnm.f32 s4, s4, s6
+; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s2
+; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s0
+; CHECK-MVE-NEXT: vldr s4, .LCPI26_0
+; CHECK-MVE-NEXT: vmaxnm.f32 s8, s3, s3
+; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s6
+; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s6
+; CHECK-MVE-NEXT: vmaxnm.f32 s10, s1, s1
+; CHECK-MVE-NEXT: vmaxnm.f32 s8, s8, s6
+; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s4
+; CHECK-MVE-NEXT: vminnm.f32 s0, s0, s4
+; CHECK-MVE-NEXT: vmaxnm.f32 s6, s10, s6
+; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s4
+; CHECK-MVE-NEXT: vminnm.f32 s4, s6, s4
; CHECK-MVE-NEXT: vcvt.u32.f32 s2, s2
; CHECK-MVE-NEXT: vcvt.u32.f32 s0, s0
; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s8
@@ -1380,9 +1400,9 @@ define arm_aapcs_vfpcc <4 x i19> @test_unsigned_v4f32_v4i19(<4 x float> %f) {
; CHECK-MVE-NEXT: .p2align 2
; CHECK-MVE-NEXT: @ %bb.1:
; CHECK-MVE-NEXT: .LCPI26_0:
-; CHECK-MVE-NEXT: .long 0x00000000 @ float 0
-; CHECK-MVE-NEXT: .LCPI26_1:
; CHECK-MVE-NEXT: .long 0x48ffffe0 @ float 524287
+; CHECK-MVE-NEXT: .LCPI26_1:
+; CHECK-MVE-NEXT: .long 0x00000000 @ float 0
;
; CHECK-MVEFP-LABEL: test_unsigned_v4f32_v4i19:
; CHECK-MVEFP: @ %bb.0:
More information about the llvm-commits
mailing list