[llvm] [LLVM][CodeGen] Flooring BW to 1 in isSaturatingMinMax (PR #160637)

Yatao Wang via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 29 14:08:16 PDT 2025


https://github.com/ningxinr updated https://github.com/llvm/llvm-project/pull/160637

>From 9b4a9c64277af5a8eb0daebdb2fc1bcae9bac567 Mon Sep 17 00:00:00 2001
From: ningxinr <ningxinr at live.cn>
Date: Wed, 24 Sep 2025 20:43:22 -0700
Subject: [PATCH 1/3] Fix Issue #160611

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  2 +-
 llvm/test/CodeGen/AArch64/fpclamptosat.ll     | 24 ++++++++++++
 llvm/test/CodeGen/RISCV/fpclamptosat.ll       | 39 +++++++++++++++++++
 llvm/test/CodeGen/X86/fpclamptosat.ll         | 16 ++++++++
 4 files changed, 80 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a6ba6e518899f..c8c7882895ca6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6047,7 +6047,7 @@ static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
   }
 
   if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
-    BW = MinCPlus1.exactLogBase2();
+    BW = std::max(MinCPlus1.exactLogBase2(), 1);
     Unsigned = true;
     return N02;
   }
diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat.ll b/llvm/test/CodeGen/AArch64/fpclamptosat.ll
index 00de1530fb72c..8b1f4e92fd339 100644
--- a/llvm/test/CodeGen/AArch64/fpclamptosat.ll
+++ b/llvm/test/CodeGen/AArch64/fpclamptosat.ll
@@ -1026,6 +1026,30 @@ entry:
   ret i64 %conv6
 }
 
+define i32 @ustest_f16const_i32() {
+; CHECK-CVT-LABEL: ustest_f16const_i32:
+; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    mov w8, #2143289344 // =0x7fc00000
+; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    fcvtzs w8, s0
+; CHECK-CVT-NEXT:    and w8, w8, w8, asr #31
+; CHECK-CVT-NEXT:    bic w0, w8, w8, asr #31
+; CHECK-CVT-NEXT:    ret
+;
+; CHECK-FP16-LABEL: ustest_f16const_i32:
+; CHECK-FP16:       // %bb.0:
+; CHECK-FP16-NEXT:    mov w8, #32256 // =0x7e00
+; CHECK-FP16-NEXT:    fmov h0, w8
+; CHECK-FP16-NEXT:    fcvtzs w8, h0
+; CHECK-FP16-NEXT:    and w8, w8, w8, asr #31
+; CHECK-FP16-NEXT:    bic w0, w8, w8, asr #31
+; CHECK-FP16-NEXT:    ret
+  %1 = fptosi half 0xH7E00 to i32
+  %2 = call i32 @llvm.smin.i32(i32 0, i32 %1)
+  %3 = call i32 @llvm.smax.i32(i32 %2, i32 0)
+  ret i32 %3
+}
+
 declare i32 @llvm.smin.i32(i32, i32)
 declare i32 @llvm.smax.i32(i32, i32)
 declare i32 @llvm.umin.i32(i32, i32)
diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
index 18d071cc39bb6..7bc58c43beb78 100644
--- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll
+++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
@@ -3829,6 +3829,45 @@ entry:
   ret i64 %conv6
 }
 
+define i32 @ustest_f16const_i32() {
+; CHECK-LABEL: ustest_f16const_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx
+; CHECK-NEXT:    movl %ecx, %eax
+; CHECK-NEXT:    sarl $31, %eax
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    andl %ecx, %eax
+; CHECK-NEXT:    cmovlel %edx, %eax
+; CHECK-NEXT:    retq
+; RV32-LABEL: ustest_f16const_i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, 523264
+; RV32-NEXT:    fmv.w.x fa5, a0
+; RV32-NEXT:    fcvt.w.s a0, fa5, rtz
+; RV32-NEXT:    srai a1, a0, 31
+; RV32-NEXT:    and a0, a1, a0
+; RV32-NEXT:    sgtz a1, a0
+; RV32-NEXT:    neg a1, a1
+; RV32-NEXT:    and a0, a1, a0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: ustest_f16const_i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    lui a0, 523264
+; RV64-NEXT:    fmv.w.x fa5, a0
+; RV64-NEXT:    fcvt.l.s a0, fa5, rtz
+; RV64-NEXT:    srai a1, a0, 63
+; RV64-NEXT:    and a0, a1, a0
+; RV64-NEXT:    sgtz a1, a0
+; RV64-NEXT:    neg a1, a1
+; RV64-NEXT:    and a0, a1, a0
+; RV64-NEXT:    ret
+  %1 = fptosi half 0xH7E00 to i32
+  %2 = call i32 @llvm.smin.i32(i32 0, i32 %1)
+  %3 = call i32 @llvm.smax.i32(i32 %2, i32 0)
+  ret i32 %3
+}
+
 declare i32 @llvm.smin.i32(i32, i32)
 declare i32 @llvm.smax.i32(i32, i32)
 declare i32 @llvm.umin.i32(i32, i32)
diff --git a/llvm/test/CodeGen/X86/fpclamptosat.ll b/llvm/test/CodeGen/X86/fpclamptosat.ll
index 3f5ec7b530fe0..65492e772cebf 100644
--- a/llvm/test/CodeGen/X86/fpclamptosat.ll
+++ b/llvm/test/CodeGen/X86/fpclamptosat.ll
@@ -1170,6 +1170,22 @@ entry:
   ret i64 %conv6
 }
 
+define i32 @ustest_f16const_i32() {
+; CHECK-LABEL: ustest_f16const_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx
+; CHECK-NEXT:    movl %ecx, %eax
+; CHECK-NEXT:    sarl $31, %eax
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    andl %ecx, %eax
+; CHECK-NEXT:    cmovlel %edx, %eax
+; CHECK-NEXT:    retq
+  %1 = fptosi half 0xH7E00 to i32
+  %2 = call i32 @llvm.smin.i32(i32 0, i32 %1)
+  %3 = call i32 @llvm.smax.i32(i32 %2, i32 0)
+  ret i32 %3
+}
+
 declare i32 @llvm.smin.i32(i32, i32)
 declare i32 @llvm.smax.i32(i32, i32)
 declare i32 @llvm.umin.i32(i32, i32)

>From f6db9a0106ba4c76215185a7f7c6587bb3bf8e97 Mon Sep 17 00:00:00 2001
From: ningxinr <ningxinr at live.cn>
Date: Mon, 29 Sep 2025 13:30:10 -0700
Subject: [PATCH 2/3] Addressed feedbacks and fixed typos

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |   4 +-
 llvm/test/CodeGen/AArch64/fpclamptosat.ll     |  53 +++--
 llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll | 101 ++++++---
 llvm/test/CodeGen/ARM/fpclamptosat.ll         |  46 +++-
 llvm/test/CodeGen/ARM/fpclamptosat_vec.ll     | 107 ++++++++--
 llvm/test/CodeGen/RISCV/fpclamptosat.ll       |  61 +++---
 .../CodeGen/RISCV/rvv/fpclamptosat_vec.ll     | 202 ++++++++++++++++--
 llvm/test/CodeGen/WebAssembly/fpclamptosat.ll |  89 +++++---
 .../CodeGen/WebAssembly/fpclamptosat_vec.ll   |  78 +++++--
 llvm/test/CodeGen/X86/fpclamptosat.ll         |  43 ++--
 llvm/test/CodeGen/X86/fpclamptosat_vec.ll     | 105 ++++++---
 11 files changed, 673 insertions(+), 216 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c8c7882895ca6..4178aca2db828 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6046,8 +6046,8 @@ static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
     return N02;
   }
 
-  if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
-    BW = std::max(MinCPlus1.exactLogBase2(), 1);
+  if (MaxC == 0 && MinC != 0 && MinCPlus1.isPowerOf2()) {
+    BW = MinCPlus1.exactLogBase2();
     Unsigned = true;
     return N02;
   }
diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat.ll b/llvm/test/CodeGen/AArch64/fpclamptosat.ll
index 8b1f4e92fd339..24be9234515e8 100644
--- a/llvm/test/CodeGen/AArch64/fpclamptosat.ll
+++ b/llvm/test/CodeGen/AArch64/fpclamptosat.ll
@@ -111,14 +111,14 @@ entry:
   ret i32 %conv6
 }
 
-define i32 @utesth_f16i32(half %x) {
-; CHECK-CVT-LABEL: utesth_f16i32:
+define i32 @utest_f16i32(half %x) {
+; CHECK-CVT-LABEL: utest_f16i32:
 ; CHECK-CVT:       // %bb.0: // %entry
 ; CHECK-CVT-NEXT:    fcvt s0, h0
 ; CHECK-CVT-NEXT:    fcvtzu w0, s0
 ; CHECK-CVT-NEXT:    ret
 ;
-; CHECK-FP16-LABEL: utesth_f16i32:
+; CHECK-FP16-LABEL: utest_f16i32:
 ; CHECK-FP16:       // %bb.0: // %entry
 ; CHECK-FP16-NEXT:    fcvtzu w0, h0
 ; CHECK-FP16-NEXT:    ret
@@ -298,8 +298,8 @@ entry:
   ret i16 %conv6
 }
 
-define i16 @utesth_f16i16(half %x) {
-; CHECK-CVT-LABEL: utesth_f16i16:
+define i16 @utest_f16i16(half %x) {
+; CHECK-CVT-LABEL: utest_f16i16:
 ; CHECK-CVT:       // %bb.0: // %entry
 ; CHECK-CVT-NEXT:    fcvt s0, h0
 ; CHECK-CVT-NEXT:    mov w9, #65535 // =0xffff
@@ -308,7 +308,7 @@ define i16 @utesth_f16i16(half %x) {
 ; CHECK-CVT-NEXT:    csel w0, w8, w9, lo
 ; CHECK-CVT-NEXT:    ret
 ;
-; CHECK-FP16-LABEL: utesth_f16i16:
+; CHECK-FP16-LABEL: utest_f16i16:
 ; CHECK-FP16:       // %bb.0: // %entry
 ; CHECK-FP16-NEXT:    fcvtzu w8, h0
 ; CHECK-FP16-NEXT:    mov w9, #65535 // =0xffff
@@ -493,8 +493,8 @@ entry:
   ret i64 %conv6
 }
 
-define i64 @utesth_f16i64(half %x) {
-; CHECK-LABEL: utesth_f16i64:
+define i64 @utest_f16i64(half %x) {
+; CHECK-LABEL: utest_f16i64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
@@ -636,14 +636,14 @@ entry:
   ret i32 %conv6
 }
 
-define i32 @utesth_f16i32_mm(half %x) {
-; CHECK-CVT-LABEL: utesth_f16i32_mm:
+define i32 @utest_f16i32_mm(half %x) {
+; CHECK-CVT-LABEL: utest_f16i32_mm:
 ; CHECK-CVT:       // %bb.0: // %entry
 ; CHECK-CVT-NEXT:    fcvt s0, h0
 ; CHECK-CVT-NEXT:    fcvtzu w0, s0
 ; CHECK-CVT-NEXT:    ret
 ;
-; CHECK-FP16-LABEL: utesth_f16i32_mm:
+; CHECK-FP16-LABEL: utest_f16i32_mm:
 ; CHECK-FP16:       // %bb.0: // %entry
 ; CHECK-FP16-NEXT:    fcvtzu w0, h0
 ; CHECK-FP16-NEXT:    ret
@@ -808,8 +808,8 @@ entry:
   ret i16 %conv6
 }
 
-define i16 @utesth_f16i16_mm(half %x) {
-; CHECK-CVT-LABEL: utesth_f16i16_mm:
+define i16 @utest_f16i16_mm(half %x) {
+; CHECK-CVT-LABEL: utest_f16i16_mm:
 ; CHECK-CVT:       // %bb.0: // %entry
 ; CHECK-CVT-NEXT:    fcvt s0, h0
 ; CHECK-CVT-NEXT:    mov w9, #65535 // =0xffff
@@ -818,7 +818,7 @@ define i16 @utesth_f16i16_mm(half %x) {
 ; CHECK-CVT-NEXT:    csel w0, w8, w9, lo
 ; CHECK-CVT-NEXT:    ret
 ;
-; CHECK-FP16-LABEL: utesth_f16i16_mm:
+; CHECK-FP16-LABEL: utest_f16i16_mm:
 ; CHECK-FP16:       // %bb.0: // %entry
 ; CHECK-FP16-NEXT:    fcvtzu w8, h0
 ; CHECK-FP16-NEXT:    mov w9, #65535 // =0xffff
@@ -986,8 +986,8 @@ entry:
   ret i64 %conv6
 }
 
-define i64 @utesth_f16i64_mm(half %x) {
-; CHECK-LABEL: utesth_f16i64_mm:
+define i64 @utest_f16i64_mm(half %x) {
+; CHECK-LABEL: utest_f16i64_mm:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
@@ -1026,28 +1026,27 @@ entry:
   ret i64 %conv6
 }
 
-define i32 @ustest_f16const_i32() {
-; CHECK-CVT-LABEL: ustest_f16const_i32:
+; i32 non saturate
+
+define i32 @ustest_f16i32_nsat(half %x) {
+; CHECK-CVT-LABEL: ustest_f16i32_nsat:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    mov w8, #2143289344 // =0x7fc00000
-; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    fcvt s0, h0
 ; CHECK-CVT-NEXT:    fcvtzs w8, s0
 ; CHECK-CVT-NEXT:    and w8, w8, w8, asr #31
 ; CHECK-CVT-NEXT:    bic w0, w8, w8, asr #31
 ; CHECK-CVT-NEXT:    ret
 ;
-; CHECK-FP16-LABEL: ustest_f16const_i32:
+; CHECK-FP16-LABEL: ustest_f16i32_nsat:
 ; CHECK-FP16:       // %bb.0:
-; CHECK-FP16-NEXT:    mov w8, #32256 // =0x7e00
-; CHECK-FP16-NEXT:    fmov h0, w8
 ; CHECK-FP16-NEXT:    fcvtzs w8, h0
 ; CHECK-FP16-NEXT:    and w8, w8, w8, asr #31
 ; CHECK-FP16-NEXT:    bic w0, w8, w8, asr #31
 ; CHECK-FP16-NEXT:    ret
-  %1 = fptosi half 0xH7E00 to i32
-  %2 = call i32 @llvm.smin.i32(i32 0, i32 %1)
-  %3 = call i32 @llvm.smax.i32(i32 %2, i32 0)
-  ret i32 %3
+  %conv = fptosi half %x to i32
+  %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv)
+  %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+  ret i32 %spec.store.select7
 }
 
 declare i32 @llvm.smin.i32(i32, i32)
diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
index b09a86723bb56..637c02875b84e 100644
--- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
@@ -321,20 +321,20 @@ entry:
   ret <4 x i32> %conv6
 }
 
-define <4 x i32> @utesth_f16i32(<4 x half> %x) {
-; CHECK-CVT-SD-LABEL: utesth_f16i32:
+define <4 x i32> @utest_f16i32(<4 x half> %x) {
+; CHECK-CVT-SD-LABEL: utest_f16i32:
 ; CHECK-CVT-SD:       // %bb.0: // %entry
 ; CHECK-CVT-SD-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-CVT-SD-NEXT:    fcvtzu v0.4s, v0.4s
 ; CHECK-CVT-SD-NEXT:    ret
 ;
-; CHECK-FP16-SD-LABEL: utesth_f16i32:
+; CHECK-FP16-SD-LABEL: utest_f16i32:
 ; CHECK-FP16-SD:       // %bb.0: // %entry
 ; CHECK-FP16-SD-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-FP16-SD-NEXT:    fcvtzu v0.4s, v0.4s
 ; CHECK-FP16-SD-NEXT:    ret
 ;
-; CHECK-CVT-GI-LABEL: utesth_f16i32:
+; CHECK-CVT-GI-LABEL: utest_f16i32:
 ; CHECK-CVT-GI:       // %bb.0: // %entry
 ; CHECK-CVT-GI-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-CVT-GI-NEXT:    movi v1.2d, #0x000000ffffffff
@@ -349,7 +349,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
 ; CHECK-CVT-GI-NEXT:    uzp1 v0.4s, v2.4s, v0.4s
 ; CHECK-CVT-GI-NEXT:    ret
 ;
-; CHECK-FP16-GI-LABEL: utesth_f16i32:
+; CHECK-FP16-GI-LABEL: utest_f16i32:
 ; CHECK-FP16-GI:       // %bb.0: // %entry
 ; CHECK-FP16-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-FP16-GI-NEXT:    mov h2, v0.h[1]
@@ -614,8 +614,8 @@ entry:
   ret <8 x i16> %conv6
 }
 
-define <8 x i16> @utesth_f16i16(<8 x half> %x) {
-; CHECK-CVT-LABEL: utesth_f16i16:
+define <8 x i16> @utest_f16i16(<8 x half> %x) {
+; CHECK-CVT-LABEL: utest_f16i16:
 ; CHECK-CVT:       // %bb.0: // %entry
 ; CHECK-CVT-NEXT:    fcvtl v1.4s, v0.4h
 ; CHECK-CVT-NEXT:    fcvtl2 v0.4s, v0.8h
@@ -625,12 +625,12 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
 ; CHECK-CVT-NEXT:    uqxtn2 v0.8h, v2.4s
 ; CHECK-CVT-NEXT:    ret
 ;
-; CHECK-FP16-SD-LABEL: utesth_f16i16:
+; CHECK-FP16-SD-LABEL: utest_f16i16:
 ; CHECK-FP16-SD:       // %bb.0: // %entry
 ; CHECK-FP16-SD-NEXT:    fcvtzu v0.8h, v0.8h
 ; CHECK-FP16-SD-NEXT:    ret
 ;
-; CHECK-FP16-GI-LABEL: utesth_f16i16:
+; CHECK-FP16-GI-LABEL: utest_f16i16:
 ; CHECK-FP16-GI:       // %bb.0: // %entry
 ; CHECK-FP16-GI-NEXT:    fcvtl v1.4s, v0.4h
 ; CHECK-FP16-GI-NEXT:    fcvtl2 v0.4s, v0.8h
@@ -1746,8 +1746,8 @@ entry:
   ret <2 x i64> %conv6
 }
 
-define <2 x i64> @utesth_f16i64(<2 x half> %x) {
-; CHECK-CVT-SD-LABEL: utesth_f16i64:
+define <2 x i64> @utest_f16i64(<2 x half> %x) {
+; CHECK-CVT-SD-LABEL: utest_f16i64:
 ; CHECK-CVT-SD:       // %bb.0: // %entry
 ; CHECK-CVT-SD-NEXT:    sub sp, sp, #48
 ; CHECK-CVT-SD-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
@@ -1777,7 +1777,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
 ; CHECK-CVT-SD-NEXT:    add sp, sp, #48
 ; CHECK-CVT-SD-NEXT:    ret
 ;
-; CHECK-FP16-SD-LABEL: utesth_f16i64:
+; CHECK-FP16-SD-LABEL: utest_f16i64:
 ; CHECK-FP16-SD:       // %bb.0: // %entry
 ; CHECK-FP16-SD-NEXT:    sub sp, sp, #48
 ; CHECK-FP16-SD-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
@@ -1807,7 +1807,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
 ; CHECK-FP16-SD-NEXT:    add sp, sp, #48
 ; CHECK-FP16-SD-NEXT:    ret
 ;
-; CHECK-CVT-GI-LABEL: utesth_f16i64:
+; CHECK-CVT-GI-LABEL: utest_f16i64:
 ; CHECK-CVT-GI:       // %bb.0: // %entry
 ; CHECK-CVT-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-CVT-GI-NEXT:    mov h1, v0.h[1]
@@ -1819,7 +1819,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
 ; CHECK-CVT-GI-NEXT:    mov v0.d[1], x9
 ; CHECK-CVT-GI-NEXT:    ret
 ;
-; CHECK-FP16-GI-LABEL: utesth_f16i64:
+; CHECK-FP16-GI-LABEL: utest_f16i64:
 ; CHECK-FP16-GI:       // %bb.0: // %entry
 ; CHECK-FP16-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-FP16-GI-NEXT:    mov h1, v0.h[1]
@@ -2307,20 +2307,20 @@ entry:
   ret <4 x i32> %conv6
 }
 
-define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
-; CHECK-CVT-SD-LABEL: utesth_f16i32_mm:
+define <4 x i32> @utest_f16i32_mm(<4 x half> %x) {
+; CHECK-CVT-SD-LABEL: utest_f16i32_mm:
 ; CHECK-CVT-SD:       // %bb.0: // %entry
 ; CHECK-CVT-SD-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-CVT-SD-NEXT:    fcvtzu v0.4s, v0.4s
 ; CHECK-CVT-SD-NEXT:    ret
 ;
-; CHECK-FP16-SD-LABEL: utesth_f16i32_mm:
+; CHECK-FP16-SD-LABEL: utest_f16i32_mm:
 ; CHECK-FP16-SD:       // %bb.0: // %entry
 ; CHECK-FP16-SD-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-FP16-SD-NEXT:    fcvtzu v0.4s, v0.4s
 ; CHECK-FP16-SD-NEXT:    ret
 ;
-; CHECK-CVT-GI-LABEL: utesth_f16i32_mm:
+; CHECK-CVT-GI-LABEL: utest_f16i32_mm:
 ; CHECK-CVT-GI:       // %bb.0: // %entry
 ; CHECK-CVT-GI-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-CVT-GI-NEXT:    movi v1.2d, #0x000000ffffffff
@@ -2335,7 +2335,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
 ; CHECK-CVT-GI-NEXT:    uzp1 v0.4s, v2.4s, v0.4s
 ; CHECK-CVT-GI-NEXT:    ret
 ;
-; CHECK-FP16-GI-LABEL: utesth_f16i32_mm:
+; CHECK-FP16-GI-LABEL: utest_f16i32_mm:
 ; CHECK-FP16-GI:       // %bb.0: // %entry
 ; CHECK-FP16-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-FP16-GI-NEXT:    mov h2, v0.h[1]
@@ -2585,8 +2585,8 @@ entry:
   ret <8 x i16> %conv6
 }
 
-define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
-; CHECK-CVT-LABEL: utesth_f16i16_mm:
+define <8 x i16> @utest_f16i16_mm(<8 x half> %x) {
+; CHECK-CVT-LABEL: utest_f16i16_mm:
 ; CHECK-CVT:       // %bb.0: // %entry
 ; CHECK-CVT-NEXT:    fcvtl v1.4s, v0.4h
 ; CHECK-CVT-NEXT:    fcvtl2 v0.4s, v0.8h
@@ -2596,12 +2596,12 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
 ; CHECK-CVT-NEXT:    uqxtn2 v0.8h, v2.4s
 ; CHECK-CVT-NEXT:    ret
 ;
-; CHECK-FP16-SD-LABEL: utesth_f16i16_mm:
+; CHECK-FP16-SD-LABEL: utest_f16i16_mm:
 ; CHECK-FP16-SD:       // %bb.0: // %entry
 ; CHECK-FP16-SD-NEXT:    fcvtzu v0.8h, v0.8h
 ; CHECK-FP16-SD-NEXT:    ret
 ;
-; CHECK-FP16-GI-LABEL: utesth_f16i16_mm:
+; CHECK-FP16-GI-LABEL: utest_f16i16_mm:
 ; CHECK-FP16-GI:       // %bb.0: // %entry
 ; CHECK-FP16-GI-NEXT:    fcvtl v1.4s, v0.4h
 ; CHECK-FP16-GI-NEXT:    fcvtl2 v0.4s, v0.8h
@@ -3694,8 +3694,8 @@ entry:
   ret <2 x i64> %conv6
 }
 
-define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
-; CHECK-CVT-SD-LABEL: utesth_f16i64_mm:
+define <2 x i64> @utest_f16i64_mm(<2 x half> %x) {
+; CHECK-CVT-SD-LABEL: utest_f16i64_mm:
 ; CHECK-CVT-SD:       // %bb.0: // %entry
 ; CHECK-CVT-SD-NEXT:    sub sp, sp, #48
 ; CHECK-CVT-SD-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
@@ -3725,7 +3725,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
 ; CHECK-CVT-SD-NEXT:    add sp, sp, #48
 ; CHECK-CVT-SD-NEXT:    ret
 ;
-; CHECK-FP16-SD-LABEL: utesth_f16i64_mm:
+; CHECK-FP16-SD-LABEL: utest_f16i64_mm:
 ; CHECK-FP16-SD:       // %bb.0: // %entry
 ; CHECK-FP16-SD-NEXT:    sub sp, sp, #48
 ; CHECK-FP16-SD-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
@@ -3755,7 +3755,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
 ; CHECK-FP16-SD-NEXT:    add sp, sp, #48
 ; CHECK-FP16-SD-NEXT:    ret
 ;
-; CHECK-CVT-GI-LABEL: utesth_f16i64_mm:
+; CHECK-CVT-GI-LABEL: utest_f16i64_mm:
 ; CHECK-CVT-GI:       // %bb.0: // %entry
 ; CHECK-CVT-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-CVT-GI-NEXT:    mov h1, v0.h[1]
@@ -3767,7 +3767,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
 ; CHECK-CVT-GI-NEXT:    mov v0.d[1], x9
 ; CHECK-CVT-GI-NEXT:    ret
 ;
-; CHECK-FP16-GI-LABEL: utesth_f16i64_mm:
+; CHECK-FP16-GI-LABEL: utest_f16i64_mm:
 ; CHECK-FP16-GI:       // %bb.0: // %entry
 ; CHECK-FP16-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-FP16-GI-NEXT:    mov h1, v0.h[1]
@@ -3941,6 +3941,51 @@ entry:
   ret <2 x i64> %conv6
 }
 
+; i32 non saturate
+
+define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) {
+; CHECK-CVT-SD-LABEL: ustest_f16i32_nsat:
+; CHECK-CVT-SD:       // %bb.0: // %entry
+; CHECK-CVT-SD-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-CVT-SD-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-CVT-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-CVT-SD-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-CVT-SD-NEXT:    ret
+;
+; CHECK-FP16-SD-LABEL: ustest_f16i32_nsat:
+; CHECK-FP16-SD:       // %bb.0: // %entry
+; CHECK-FP16-SD-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-FP16-SD-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-FP16-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-FP16-SD-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-FP16-SD-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-FP16-SD-NEXT:    ret
+;
+; CHECK-CVT-GI-LABEL: ustest_f16i32_nsat:
+; CHECK-CVT-GI:       // %bb.0: // %entry
+; CHECK-CVT-GI-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-CVT-GI-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-CVT-GI-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-CVT-GI-NEXT:    smin v0.4s, v1.4s, v0.4s
+; CHECK-CVT-GI-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-CVT-GI-NEXT:    ret
+;
+; CHECK-FP16-GI-LABEL: ustest_f16i32_nsat:
+; CHECK-FP16-GI:       // %bb.0: // %entry
+; CHECK-FP16-GI-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-FP16-GI-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-FP16-GI-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-FP16-GI-NEXT:    smin v0.4s, v1.4s, v0.4s
+; CHECK-FP16-GI-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-FP16-GI-NEXT:    ret
+entry:
+  %conv = fptosi <4 x half> %x to <4 x i32>
+  %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv)
+  %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer)
+  ret <4 x i32> %spec.store.select7
+}
+
 declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
 declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
 declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)
diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll
index 8ab56b228d2a7..37f230abb7a14 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll
@@ -383,8 +383,8 @@ entry:
   ret i32 %conv6
 }
 
-define i32 @utesth_f16i32(half %x) {
-; SOFT-LABEL: utesth_f16i32:
+define i32 @utest_f16i32(half %x) {
+; SOFT-LABEL: utest_f16i32:
 ; SOFT:       @ %bb.0: @ %entry
 ; SOFT-NEXT:    .save {r7, lr}
 ; SOFT-NEXT:    push {r7, lr}
@@ -400,7 +400,7 @@ define i32 @utesth_f16i32(half %x) {
 ; SOFT-NEXT:  .LBB7_2: @ %entry
 ; SOFT-NEXT:    pop {r7, pc}
 ;
-; VFP2-LABEL: utesth_f16i32:
+; VFP2-LABEL: utest_f16i32:
 ; VFP2:       @ %bb.0: @ %entry
 ; VFP2-NEXT:    .save {r7, lr}
 ; VFP2-NEXT:    push {r7, lr}
@@ -411,7 +411,7 @@ define i32 @utesth_f16i32(half %x) {
 ; VFP2-NEXT:    vmov r0, s0
 ; VFP2-NEXT:    pop {r7, pc}
 ;
-; FULL-LABEL: utesth_f16i32:
+; FULL-LABEL: utest_f16i32:
 ; FULL:       @ %bb.0: @ %entry
 ; FULL-NEXT:    vcvt.u32.f16 s0, s0
 ; FULL-NEXT:    vmov r0, s0
@@ -3985,6 +3985,44 @@ entry:
   ret i32 %spec.store.select7
 }
 
+define i32 @ustest_f16i32_nsat(half %x) {
+; SOFT-LABEL: ustest_f16i32_nsat:
+; SOFT:       @ %bb.0:
+; SOFT-NEXT:    .save {r7, lr}
+; SOFT-NEXT:    push {r7, lr}
+; SOFT-NEXT:    uxth r0, r0
+; SOFT-NEXT:    bl __aeabi_h2f
+; SOFT-NEXT:    bl __aeabi_f2iz
+; SOFT-NEXT:    asrs r1, r0, #31
+; SOFT-NEXT:    ands r0, r1
+; SOFT-NEXT:    asrs r1, r0, #31
+; SOFT-NEXT:    bics r0, r1
+; SOFT-NEXT:    pop {r7, pc}
+;
+; VFP2-LABEL: ustest_f16i32_nsat:
+; VFP2:       @ %bb.0:
+; VFP2-NEXT:    .save {r7, lr}
+; VFP2-NEXT:    push {r7, lr}
+; VFP2-NEXT:    vmov r0, s0
+; VFP2-NEXT:    bl __aeabi_h2f
+; VFP2-NEXT:    vmov s0, r0
+; VFP2-NEXT:    vcvt.s32.f32 s0, s0
+; VFP2-NEXT:    vmov r0, s0
+; VFP2-NEXT:    usat r0, #0, r0
+; VFP2-NEXT:    pop {r7, pc}
+;
+; FULL-LABEL: ustest_f16i32_nsat:
+; FULL:       @ %bb.0:
+; FULL-NEXT:    vcvt.s32.f16 s0, s0
+; FULL-NEXT:    vmov r0, s0
+; FULL-NEXT:    usat r0, #0, r0
+; FULL-NEXT:    bx lr
+  %conv = fptosi half %x to i32
+  %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv)
+  %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+  ret i32 %spec.store.select7
+}
+
 
 
 declare i32 @llvm.smin.i32(i32, i32)
diff --git a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
index 96f009a4da02d..ba31b353ee1f6 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
@@ -748,8 +748,8 @@ entry:
   ret <4 x i32> %conv6
 }
 
-define <4 x i32> @utesth_f16i32(<4 x half> %x) {
-; CHECK-NEON-LABEL: utesth_f16i32:
+define <4 x i32> @utest_f16i32(<4 x half> %x) {
+; CHECK-NEON-LABEL: utest_f16i32:
 ; CHECK-NEON:       @ %bb.0: @ %entry
 ; CHECK-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
 ; CHECK-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
@@ -821,7 +821,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
 ; CHECK-NEON-NEXT:    vpop {d12, d13}
 ; CHECK-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
 ;
-; CHECK-FP16-LABEL: utesth_f16i32:
+; CHECK-FP16-LABEL: utest_f16i32:
 ; CHECK-FP16:       @ %bb.0: @ %entry
 ; CHECK-FP16-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
 ; CHECK-FP16-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
@@ -1366,8 +1366,8 @@ entry:
   ret <8 x i16> %conv6
 }
 
-define <8 x i16> @utesth_f16i16(<8 x half> %x) {
-; CHECK-NEON-LABEL: utesth_f16i16:
+define <8 x i16> @utest_f16i16(<8 x half> %x) {
+; CHECK-NEON-LABEL: utest_f16i16:
 ; CHECK-NEON:       @ %bb.0: @ %entry
 ; CHECK-NEON-NEXT:    .save {r4, r5, r6, r7, r11, lr}
 ; CHECK-NEON-NEXT:    push {r4, r5, r6, r7, r11, lr}
@@ -1441,7 +1441,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
 ; CHECK-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14}
 ; CHECK-NEON-NEXT:    pop {r4, r5, r6, r7, r11, pc}
 ;
-; CHECK-FP16-LABEL: utesth_f16i16:
+; CHECK-FP16-LABEL: utest_f16i16:
 ; CHECK-FP16:       @ %bb.0: @ %entry
 ; CHECK-FP16-NEXT:    vmovx.f16 s4, s0
 ; CHECK-FP16-NEXT:    vcvt.u32.f16 s12, s0
@@ -2109,8 +2109,8 @@ entry:
   ret <2 x i64> %conv6
 }
 
-define <2 x i64> @utesth_f16i64(<2 x half> %x) {
-; CHECK-NEON-LABEL: utesth_f16i64:
+define <2 x i64> @utest_f16i64(<2 x half> %x) {
+; CHECK-NEON-LABEL: utest_f16i64:
 ; CHECK-NEON:       @ %bb.0: @ %entry
 ; CHECK-NEON-NEXT:    .save {r4, r5, r6, lr}
 ; CHECK-NEON-NEXT:    push {r4, r5, r6, lr}
@@ -2148,7 +2148,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
 ; CHECK-NEON-NEXT:    vpop {d8}
 ; CHECK-NEON-NEXT:    pop {r4, r5, r6, pc}
 ;
-; CHECK-FP16-LABEL: utesth_f16i64:
+; CHECK-FP16-LABEL: utest_f16i64:
 ; CHECK-FP16:       @ %bb.0: @ %entry
 ; CHECK-FP16-NEXT:    .save {r4, r5, r6, lr}
 ; CHECK-FP16-NEXT:    push {r4, r5, r6, lr}
@@ -2835,8 +2835,8 @@ entry:
   ret <4 x i32> %conv6
 }
 
-define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
-; CHECK-NEON-LABEL: utesth_f16i32_mm:
+define <4 x i32> @utest_f16i32_mm(<4 x half> %x) {
+; CHECK-NEON-LABEL: utest_f16i32_mm:
 ; CHECK-NEON:       @ %bb.0: @ %entry
 ; CHECK-NEON-NEXT:    .save {r4, r5, r6, r7, r11, lr}
 ; CHECK-NEON-NEXT:    push {r4, r5, r6, r7, r11, lr}
@@ -2881,7 +2881,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEON-NEXT:    vpop {d8, d9, d10, d11}
 ; CHECK-NEON-NEXT:    pop {r4, r5, r6, r7, r11, pc}
 ;
-; CHECK-FP16-LABEL: utesth_f16i32_mm:
+; CHECK-FP16-LABEL: utest_f16i32_mm:
 ; CHECK-FP16:       @ %bb.0: @ %entry
 ; CHECK-FP16-NEXT:    .save {r4, r5, r6, lr}
 ; CHECK-FP16-NEXT:    push {r4, r5, r6, lr}
@@ -3344,8 +3344,8 @@ entry:
   ret <8 x i16> %conv6
 }
 
-define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
-; CHECK-NEON-LABEL: utesth_f16i16_mm:
+define <8 x i16> @utest_f16i16_mm(<8 x half> %x) {
+; CHECK-NEON-LABEL: utest_f16i16_mm:
 ; CHECK-NEON:       @ %bb.0: @ %entry
 ; CHECK-NEON-NEXT:    .save {r4, r5, r6, r7, r11, lr}
 ; CHECK-NEON-NEXT:    push {r4, r5, r6, r7, r11, lr}
@@ -3419,7 +3419,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
 ; CHECK-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14}
 ; CHECK-NEON-NEXT:    pop {r4, r5, r6, r7, r11, pc}
 ;
-; CHECK-FP16-LABEL: utesth_f16i16_mm:
+; CHECK-FP16-LABEL: utest_f16i16_mm:
 ; CHECK-FP16:       @ %bb.0: @ %entry
 ; CHECK-FP16-NEXT:    vmovx.f16 s4, s0
 ; CHECK-FP16-NEXT:    vcvt.u32.f16 s12, s0
@@ -4044,8 +4044,8 @@ entry:
   ret <2 x i64> %conv6
 }
 
-define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
-; CHECK-NEON-LABEL: utesth_f16i64_mm:
+define <2 x i64> @utest_f16i64_mm(<2 x half> %x) {
+; CHECK-NEON-LABEL: utest_f16i64_mm:
 ; CHECK-NEON:       @ %bb.0: @ %entry
 ; CHECK-NEON-NEXT:    .save {r4, r5, r6, lr}
 ; CHECK-NEON-NEXT:    push {r4, r5, r6, lr}
@@ -4083,7 +4083,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
 ; CHECK-NEON-NEXT:    vpop {d8}
 ; CHECK-NEON-NEXT:    pop {r4, r5, r6, pc}
 ;
-; CHECK-FP16-LABEL: utesth_f16i64_mm:
+; CHECK-FP16-LABEL: utest_f16i64_mm:
 ; CHECK-FP16:       @ %bb.0: @ %entry
 ; CHECK-FP16-NEXT:    .save {r4, r5, r6, lr}
 ; CHECK-FP16-NEXT:    push {r4, r5, r6, lr}
@@ -4215,6 +4215,77 @@ entry:
   ret <2 x i64> %conv6
 }
 
+; i32 non saturate
+
+define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) {
+; CHECK-NEON-LABEL: ustest_f16i32_nsat:
+; CHECK-NEON:       @ %bb.0: @ %entry
+; CHECK-NEON-NEXT:    .save {r4, lr}
+; CHECK-NEON-NEXT:    push {r4, lr}
+; CHECK-NEON-NEXT:    .vsave {d8, d9, d10, d11}
+; CHECK-NEON-NEXT:    vpush {d8, d9, d10, d11}
+; CHECK-NEON-NEXT:    vmov r0, s0
+; CHECK-NEON-NEXT:    vmov.f32 s16, s3
+; CHECK-NEON-NEXT:    vmov.f32 s18, s2
+; CHECK-NEON-NEXT:    vmov.f32 s20, s1
+; CHECK-NEON-NEXT:    bl __aeabi_h2f
+; CHECK-NEON-NEXT:    mov r4, r0
+; CHECK-NEON-NEXT:    vmov r0, s16
+; CHECK-NEON-NEXT:    bl __aeabi_h2f
+; CHECK-NEON-NEXT:    vmov s16, r0
+; CHECK-NEON-NEXT:    vmov r0, s18
+; CHECK-NEON-NEXT:    bl __aeabi_h2f
+; CHECK-NEON-NEXT:    vmov s0, r0
+; CHECK-NEON-NEXT:    vmov r1, s20
+; CHECK-NEON-NEXT:    vcvt.s32.f32 s0, s0
+; CHECK-NEON-NEXT:    vmov s18, r4
+; CHECK-NEON-NEXT:    vmov r0, s0
+; CHECK-NEON-NEXT:    vmov.32 d11[0], r0
+; CHECK-NEON-NEXT:    mov r0, r1
+; CHECK-NEON-NEXT:    bl __aeabi_h2f
+; CHECK-NEON-NEXT:    vcvt.s32.f32 s2, s18
+; CHECK-NEON-NEXT:    vmov s0, r0
+; CHECK-NEON-NEXT:    vcvt.s32.f32 s4, s16
+; CHECK-NEON-NEXT:    vcvt.s32.f32 s0, s0
+; CHECK-NEON-NEXT:    vmov.i32 q8, #0x0
+; CHECK-NEON-NEXT:    vmov r0, s2
+; CHECK-NEON-NEXT:    vmov.32 d10[0], r0
+; CHECK-NEON-NEXT:    vmov r0, s4
+; CHECK-NEON-NEXT:    vmov.32 d11[1], r0
+; CHECK-NEON-NEXT:    vmov r0, s0
+; CHECK-NEON-NEXT:    vmov.32 d10[1], r0
+; CHECK-NEON-NEXT:    vmin.s32 q9, q5, q8
+; CHECK-NEON-NEXT:    vmax.s32 q0, q9, q8
+; CHECK-NEON-NEXT:    vpop {d8, d9, d10, d11}
+; CHECK-NEON-NEXT:    pop {r4, pc}
+;
+; CHECK-FP16-LABEL: ustest_f16i32_nsat:
+; CHECK-FP16:       @ %bb.0: @ %entry
+; CHECK-FP16-NEXT:    vmovx.f16 s2, s0
+; CHECK-FP16-NEXT:    vcvt.s32.f16 s6, s0
+; CHECK-FP16-NEXT:    vcvt.s32.f16 s0, s1
+; CHECK-FP16-NEXT:    vmovx.f16 s4, s1
+; CHECK-FP16-NEXT:    vmov r0, s0
+; CHECK-FP16-NEXT:    vcvt.s32.f16 s4, s4
+; CHECK-FP16-NEXT:    vcvt.s32.f16 s2, s2
+; CHECK-FP16-NEXT:    vmov.i32 q9, #0x0
+; CHECK-FP16-NEXT:    vmov.32 d17[0], r0
+; CHECK-FP16-NEXT:    vmov r0, s6
+; CHECK-FP16-NEXT:    vmov.32 d16[0], r0
+; CHECK-FP16-NEXT:    vmov r0, s4
+; CHECK-FP16-NEXT:    vmov.32 d17[1], r0
+; CHECK-FP16-NEXT:    vmov r0, s2
+; CHECK-FP16-NEXT:    vmov.32 d16[1], r0
+; CHECK-FP16-NEXT:    vmin.s32 q8, q8, q9
+; CHECK-FP16-NEXT:    vmax.s32 q0, q8, q9
+; CHECK-FP16-NEXT:    bx lr
+entry:
+  %conv = fptosi <4 x half> %x to <4 x i32>
+  %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv)
+  %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer)
+  ret <4 x i32> %spec.store.select7
+}
+
 declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
 declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
 declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)
diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
index 7bc58c43beb78..cb2555731e133 100644
--- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll
+++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
@@ -436,8 +436,8 @@ entry:
   ret i32 %conv6
 }
 
-define i32 @utesth_f16i32(half %x) {
-; RV32-LABEL: utesth_f16i32:
+define i32 @utest_f16i32(half %x) {
+; RV32-LABEL: utest_f16i32:
 ; RV32:       # %bb.0: # %entry
 ; RV32-NEXT:    addi sp, sp, -16
 ; RV32-NEXT:    .cfi_def_cfa_offset 16
@@ -456,7 +456,7 @@ define i32 @utesth_f16i32(half %x) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: utesth_f16i32:
+; RV64-LABEL: utest_f16i32:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    addi sp, sp, -16
 ; RV64-NEXT:    .cfi_def_cfa_offset 16
@@ -974,8 +974,8 @@ entry:
   ret i16 %conv6
 }
 
-define i16 @utesth_f16i16(half %x) {
-; RV32-LABEL: utesth_f16i16:
+define i16 @utest_f16i16(half %x) {
+; RV32-LABEL: utest_f16i16:
 ; RV32:       # %bb.0: # %entry
 ; RV32-NEXT:    addi sp, sp, -16
 ; RV32-NEXT:    .cfi_def_cfa_offset 16
@@ -995,7 +995,7 @@ define i16 @utesth_f16i16(half %x) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: utesth_f16i16:
+; RV64-LABEL: utest_f16i16:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    addi sp, sp, -16
 ; RV64-NEXT:    .cfi_def_cfa_offset 16
@@ -3829,43 +3829,48 @@ entry:
   ret i64 %conv6
 }
 
-define i32 @ustest_f16const_i32() {
-; CHECK-LABEL: ustest_f16const_i32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx
-; CHECK-NEXT:    movl %ecx, %eax
-; CHECK-NEXT:    sarl $31, %eax
-; CHECK-NEXT:    xorl %edx, %edx
-; CHECK-NEXT:    andl %ecx, %eax
-; CHECK-NEXT:    cmovlel %edx, %eax
-; CHECK-NEXT:    retq
-; RV32-LABEL: ustest_f16const_i32:
+define i32 @ustest_f16i32_nsat(half %x) {
+; RV32-LABEL: ustest_f16i32_nsat:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, 523264
-; RV32-NEXT:    fmv.w.x fa5, a0
-; RV32-NEXT:    fcvt.w.s a0, fa5, rtz
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    call __extendhfsf2
+; RV32-NEXT:    fcvt.w.s a0, fa0, rtz
 ; RV32-NEXT:    srai a1, a0, 31
 ; RV32-NEXT:    and a0, a1, a0
 ; RV32-NEXT:    sgtz a1, a0
 ; RV32-NEXT:    neg a1, a1
 ; RV32-NEXT:    and a0, a1, a0
+; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT:    .cfi_restore ra
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: ustest_f16const_i32:
+; RV64-LABEL: ustest_f16i32_nsat:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, 523264
-; RV64-NEXT:    fmv.w.x fa5, a0
-; RV64-NEXT:    fcvt.l.s a0, fa5, rtz
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    call __extendhfsf2
+; RV64-NEXT:    fcvt.l.s a0, fa0, rtz
 ; RV64-NEXT:    srai a1, a0, 63
 ; RV64-NEXT:    and a0, a1, a0
 ; RV64-NEXT:    sgtz a1, a0
 ; RV64-NEXT:    neg a1, a1
 ; RV64-NEXT:    and a0, a1, a0
+; RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    .cfi_restore ra
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    .cfi_def_cfa_offset 0
 ; RV64-NEXT:    ret
-  %1 = fptosi half 0xH7E00 to i32
-  %2 = call i32 @llvm.smin.i32(i32 0, i32 %1)
-  %3 = call i32 @llvm.smax.i32(i32 %2, i32 0)
-  ret i32 %3
+  %conv = fptosi half %x to i32
+  %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv)
+  %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+  ret i32 %spec.store.select7
 }
 
 declare i32 @llvm.smin.i32(i32, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
index aba9d37bfaa04..f597762521006 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
@@ -519,8 +519,8 @@ entry:
   ret <4 x i32> %conv6
 }
 
-define <4 x i32> @utesth_f16i32(<4 x half> %x) {
-; CHECK-NOV-LABEL: utesth_f16i32:
+define <4 x i32> @utest_f16i32(<4 x half> %x) {
+; CHECK-NOV-LABEL: utest_f16i32:
 ; CHECK-NOV:       # %bb.0: # %entry
 ; CHECK-NOV-NEXT:    addi sp, sp, -64
 ; CHECK-NOV-NEXT:    .cfi_def_cfa_offset 64
@@ -610,7 +610,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
 ; CHECK-NOV-NEXT:    bgeu a3, a1, .LBB7_4
 ; CHECK-NOV-NEXT:    j .LBB7_5
 ;
-; CHECK-V-LABEL: utesth_f16i32:
+; CHECK-V-LABEL: utest_f16i32:
 ; CHECK-V:       # %bb.0: # %entry
 ; CHECK-V-NEXT:    addi sp, sp, -48
 ; CHECK-V-NEXT:    .cfi_def_cfa_offset 48
@@ -1594,8 +1594,8 @@ entry:
   ret <8 x i16> %conv6
 }
 
-define <8 x i16> @utesth_f16i16(<8 x half> %x) {
-; CHECK-NOV-LABEL: utesth_f16i16:
+define <8 x i16> @utest_f16i16(<8 x half> %x) {
+; CHECK-NOV-LABEL: utest_f16i16:
 ; CHECK-NOV:       # %bb.0: # %entry
 ; CHECK-NOV-NEXT:    addi sp, sp, -128
 ; CHECK-NOV-NEXT:    .cfi_def_cfa_offset 128
@@ -1765,7 +1765,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
 ; CHECK-NOV-NEXT:    bgeu a7, a3, .LBB16_8
 ; CHECK-NOV-NEXT:    j .LBB16_9
 ;
-; CHECK-V-LABEL: utesth_f16i16:
+; CHECK-V-LABEL: utest_f16i16:
 ; CHECK-V:       # %bb.0: # %entry
 ; CHECK-V-NEXT:    addi sp, sp, -80
 ; CHECK-V-NEXT:    .cfi_def_cfa_offset 80
@@ -3332,8 +3332,8 @@ entry:
   ret <2 x i64> %conv6
 }
 
-define <2 x i64> @utesth_f16i64(<2 x half> %x) {
-; CHECK-NOV-LABEL: utesth_f16i64:
+define <2 x i64> @utest_f16i64(<2 x half> %x) {
+; CHECK-NOV-LABEL: utest_f16i64:
 ; CHECK-NOV:       # %bb.0: # %entry
 ; CHECK-NOV-NEXT:    addi sp, sp, -32
 ; CHECK-NOV-NEXT:    .cfi_def_cfa_offset 32
@@ -3373,7 +3373,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
 ; CHECK-NOV-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NOV-NEXT:    ret
 ;
-; CHECK-V-LABEL: utesth_f16i64:
+; CHECK-V-LABEL: utest_f16i64:
 ; CHECK-V:       # %bb.0: # %entry
 ; CHECK-V-NEXT:    addi sp, sp, -32
 ; CHECK-V-NEXT:    .cfi_def_cfa_offset 32
@@ -4074,8 +4074,8 @@ entry:
   ret <4 x i32> %conv6
 }
 
-define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
-; CHECK-NOV-LABEL: utesth_f16i32_mm:
+define <4 x i32> @utest_f16i32_mm(<4 x half> %x) {
+; CHECK-NOV-LABEL: utest_f16i32_mm:
 ; CHECK-NOV:       # %bb.0: # %entry
 ; CHECK-NOV-NEXT:    addi sp, sp, -64
 ; CHECK-NOV-NEXT:    .cfi_def_cfa_offset 64
@@ -4165,7 +4165,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
 ; CHECK-NOV-NEXT:    bgeu a3, a1, .LBB34_4
 ; CHECK-NOV-NEXT:    j .LBB34_5
 ;
-; CHECK-V-LABEL: utesth_f16i32_mm:
+; CHECK-V-LABEL: utest_f16i32_mm:
 ; CHECK-V:       # %bb.0: # %entry
 ; CHECK-V-NEXT:    addi sp, sp, -48
 ; CHECK-V-NEXT:    .cfi_def_cfa_offset 48
@@ -5134,8 +5134,8 @@ entry:
   ret <8 x i16> %conv6
 }
 
-define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
-; CHECK-NOV-LABEL: utesth_f16i16_mm:
+define <8 x i16> @utest_f16i16_mm(<8 x half> %x) {
+; CHECK-NOV-LABEL: utest_f16i16_mm:
 ; CHECK-NOV:       # %bb.0: # %entry
 ; CHECK-NOV-NEXT:    addi sp, sp, -128
 ; CHECK-NOV-NEXT:    .cfi_def_cfa_offset 128
@@ -5305,7 +5305,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
 ; CHECK-NOV-NEXT:    bgeu a7, a3, .LBB43_8
 ; CHECK-NOV-NEXT:    j .LBB43_9
 ;
-; CHECK-V-LABEL: utesth_f16i16_mm:
+; CHECK-V-LABEL: utest_f16i16_mm:
 ; CHECK-V:       # %bb.0: # %entry
 ; CHECK-V-NEXT:    addi sp, sp, -80
 ; CHECK-V-NEXT:    .cfi_def_cfa_offset 80
@@ -6837,8 +6837,8 @@ entry:
   ret <2 x i64> %conv6
 }
 
-define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
-; CHECK-NOV-LABEL: utesth_f16i64_mm:
+define <2 x i64> @utest_f16i64_mm(<2 x half> %x) {
+; CHECK-NOV-LABEL: utest_f16i64_mm:
 ; CHECK-NOV:       # %bb.0: # %entry
 ; CHECK-NOV-NEXT:    addi sp, sp, -32
 ; CHECK-NOV-NEXT:    .cfi_def_cfa_offset 32
@@ -6877,7 +6877,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
 ; CHECK-NOV-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NOV-NEXT:    ret
 ;
-; CHECK-V-LABEL: utesth_f16i64_mm:
+; CHECK-V-LABEL: utest_f16i64_mm:
 ; CHECK-V:       # %bb.0: # %entry
 ; CHECK-V-NEXT:    addi sp, sp, -32
 ; CHECK-V-NEXT:    .cfi_def_cfa_offset 32
@@ -7048,6 +7048,172 @@ entry:
   ret <2 x i64> %conv6
 }
 
+; i32 non saturate
+
+define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) {
+; CHECK-NOV-LABEL: ustest_f16i32_nsat:
+; CHECK-NOV:       # %bb.0: # %entry
+; CHECK-NOV-NEXT:    addi sp, sp, -64
+; CHECK-NOV-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NOV-NEXT:    sd ra, 56(sp) # 8-byte Folded Spill
+; CHECK-NOV-NEXT:    sd s0, 48(sp) # 8-byte Folded Spill
+; CHECK-NOV-NEXT:    sd s1, 40(sp) # 8-byte Folded Spill
+; CHECK-NOV-NEXT:    sd s2, 32(sp) # 8-byte Folded Spill
+; CHECK-NOV-NEXT:    sd s3, 24(sp) # 8-byte Folded Spill
+; CHECK-NOV-NEXT:    fsd fs0, 16(sp) # 8-byte Folded Spill
+; CHECK-NOV-NEXT:    fsd fs1, 8(sp) # 8-byte Folded Spill
+; CHECK-NOV-NEXT:    .cfi_offset ra, -8
+; CHECK-NOV-NEXT:    .cfi_offset s0, -16
+; CHECK-NOV-NEXT:    .cfi_offset s1, -24
+; CHECK-NOV-NEXT:    .cfi_offset s2, -32
+; CHECK-NOV-NEXT:    .cfi_offset s3, -40
+; CHECK-NOV-NEXT:    .cfi_offset fs0, -48
+; CHECK-NOV-NEXT:    .cfi_offset fs1, -56
+; CHECK-NOV-NEXT:    lhu s1, 0(a1)
+; CHECK-NOV-NEXT:    lhu s2, 8(a1)
+; CHECK-NOV-NEXT:    lhu a2, 16(a1)
+; CHECK-NOV-NEXT:    lhu s3, 24(a1)
+; CHECK-NOV-NEXT:    mv s0, a0
+; CHECK-NOV-NEXT:    fmv.w.x fa0, a2
+; CHECK-NOV-NEXT:    call __extendhfsf2
+; CHECK-NOV-NEXT:    fmv.s fs0, fa0
+; CHECK-NOV-NEXT:    fmv.w.x fa0, s2
+; CHECK-NOV-NEXT:    call __extendhfsf2
+; CHECK-NOV-NEXT:    fmv.s fs1, fa0
+; CHECK-NOV-NEXT:    fmv.w.x fa0, s1
+; CHECK-NOV-NEXT:    call __extendhfsf2
+; CHECK-NOV-NEXT:    fcvt.l.s s1, fa0, rtz
+; CHECK-NOV-NEXT:    fcvt.l.s s2, fs1, rtz
+; CHECK-NOV-NEXT:    fmv.w.x fa0, s3
+; CHECK-NOV-NEXT:    fcvt.l.s s3, fs0, rtz
+; CHECK-NOV-NEXT:    call __extendhfsf2
+; CHECK-NOV-NEXT:    fcvt.l.s a0, fa0, rtz
+; CHECK-NOV-NEXT:    srai a1, s3, 63
+; CHECK-NOV-NEXT:    and a1, a1, s3
+; CHECK-NOV-NEXT:    srai a2, s2, 63
+; CHECK-NOV-NEXT:    and a2, a2, s2
+; CHECK-NOV-NEXT:    srai a3, s1, 63
+; CHECK-NOV-NEXT:    and a3, a3, s1
+; CHECK-NOV-NEXT:    srai a4, a0, 63
+; CHECK-NOV-NEXT:    and a0, a4, a0
+; CHECK-NOV-NEXT:    sgtz a4, a3
+; CHECK-NOV-NEXT:    neg a4, a4
+; CHECK-NOV-NEXT:    and a3, a4, a3
+; CHECK-NOV-NEXT:    sgtz a4, a2
+; CHECK-NOV-NEXT:    neg a4, a4
+; CHECK-NOV-NEXT:    and a2, a4, a2
+; CHECK-NOV-NEXT:    sgtz a4, a1
+; CHECK-NOV-NEXT:    neg a4, a4
+; CHECK-NOV-NEXT:    and a1, a4, a1
+; CHECK-NOV-NEXT:    sgtz a4, a0
+; CHECK-NOV-NEXT:    neg a4, a4
+; CHECK-NOV-NEXT:    and a0, a4, a0
+; CHECK-NOV-NEXT:    sw a3, 0(s0)
+; CHECK-NOV-NEXT:    sw a2, 4(s0)
+; CHECK-NOV-NEXT:    sw a1, 8(s0)
+; CHECK-NOV-NEXT:    sw a0, 12(s0)
+; CHECK-NOV-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
+; CHECK-NOV-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
+; CHECK-NOV-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
+; CHECK-NOV-NEXT:    ld s2, 32(sp) # 8-byte Folded Reload
+; CHECK-NOV-NEXT:    ld s3, 24(sp) # 8-byte Folded Reload
+; CHECK-NOV-NEXT:    fld fs0, 16(sp) # 8-byte Folded Reload
+; CHECK-NOV-NEXT:    fld fs1, 8(sp) # 8-byte Folded Reload
+; CHECK-NOV-NEXT:    .cfi_restore ra
+; CHECK-NOV-NEXT:    .cfi_restore s0
+; CHECK-NOV-NEXT:    .cfi_restore s1
+; CHECK-NOV-NEXT:    .cfi_restore s2
+; CHECK-NOV-NEXT:    .cfi_restore s3
+; CHECK-NOV-NEXT:    .cfi_restore fs0
+; CHECK-NOV-NEXT:    .cfi_restore fs1
+; CHECK-NOV-NEXT:    addi sp, sp, 64
+; CHECK-NOV-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NOV-NEXT:    ret
+;
+; CHECK-V-LABEL: ustest_f16i32_nsat:
+; CHECK-V:       # %bb.0: # %entry
+; CHECK-V-NEXT:    addi sp, sp, -48
+; CHECK-V-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-V-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-V-NEXT:    .cfi_offset ra, -8
+; CHECK-V-NEXT:    .cfi_offset s0, -16
+; CHECK-V-NEXT:    .cfi_offset s1, -24
+; CHECK-V-NEXT:    .cfi_offset s2, -32
+; CHECK-V-NEXT:    csrr a1, vlenb
+; CHECK-V-NEXT:    slli a1, a1, 1
+; CHECK-V-NEXT:    sub sp, sp, a1
+; CHECK-V-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb
+; CHECK-V-NEXT:    lhu s0, 0(a0)
+; CHECK-V-NEXT:    lhu s1, 8(a0)
+; CHECK-V-NEXT:    lhu s2, 16(a0)
+; CHECK-V-NEXT:    lhu a0, 24(a0)
+; CHECK-V-NEXT:    fmv.w.x fa0, a0
+; CHECK-V-NEXT:    call __extendhfsf2
+; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
+; CHECK-V-NEXT:    fmv.w.x fa0, s2
+; CHECK-V-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-V-NEXT:    vmv.s.x v8, a0
+; CHECK-V-NEXT:    addi a0, sp, 16
+; CHECK-V-NEXT:    vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
+; CHECK-V-NEXT:    call __extendhfsf2
+; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
+; CHECK-V-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-V-NEXT:    vmv.s.x v8, a0
+; CHECK-V-NEXT:    addi a0, sp, 16
+; CHECK-V-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; CHECK-V-NEXT:    vslideup.vi v8, v9, 1
+; CHECK-V-NEXT:    csrr a0, vlenb
+; CHECK-V-NEXT:    add a0, sp, a0
+; CHECK-V-NEXT:    addi a0, a0, 16
+; CHECK-V-NEXT:    vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
+; CHECK-V-NEXT:    fmv.w.x fa0, s1
+; CHECK-V-NEXT:    call __extendhfsf2
+; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
+; CHECK-V-NEXT:    fmv.w.x fa0, s0
+; CHECK-V-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-V-NEXT:    vmv.s.x v8, a0
+; CHECK-V-NEXT:    addi a0, sp, 16
+; CHECK-V-NEXT:    vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
+; CHECK-V-NEXT:    call __extendhfsf2
+; CHECK-V-NEXT:    fcvt.l.s a0, fa0, rtz
+; CHECK-V-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-V-NEXT:    vmv.s.x v8, a0
+; CHECK-V-NEXT:    addi a0, sp, 16
+; CHECK-V-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; CHECK-V-NEXT:    vslideup.vi v8, v9, 1
+; CHECK-V-NEXT:    csrr a0, vlenb
+; CHECK-V-NEXT:    add a0, sp, a0
+; CHECK-V-NEXT:    addi a0, a0, 16
+; CHECK-V-NEXT:    vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; CHECK-V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-V-NEXT:    vslideup.vi v8, v9, 2
+; CHECK-V-NEXT:    vmin.vx v8, v8, zero
+; CHECK-V-NEXT:    vmax.vx v8, v8, zero
+; CHECK-V-NEXT:    csrr a0, vlenb
+; CHECK-V-NEXT:    slli a0, a0, 1
+; CHECK-V-NEXT:    add sp, sp, a0
+; CHECK-V-NEXT:    .cfi_def_cfa sp, 48
+; CHECK-V-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-V-NEXT:    .cfi_restore ra
+; CHECK-V-NEXT:    .cfi_restore s0
+; CHECK-V-NEXT:    .cfi_restore s1
+; CHECK-V-NEXT:    .cfi_restore s2
+; CHECK-V-NEXT:    addi sp, sp, 48
+; CHECK-V-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-V-NEXT:    ret
+entry:
+  %conv = fptosi <4 x half> %x to <4 x i32>
+  %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv)
+  %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer)
+  ret <4 x i32> %spec.store.select7
+}
+
 declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
 declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
 declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)
diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
index 137994ceac132..59f3edc7168be 100644
--- a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
@@ -136,9 +136,9 @@ entry:
   ret i32 %conv6
 }
 
-define i32 @utesth_f16i32(half %x) {
-; CHECK-LABEL: utesth_f16i32:
-; CHECK:         .functype utesth_f16i32 (f32) -> (i32)
+define i32 @utest_f16i32(half %x) {
+; CHECK-LABEL: utest_f16i32:
+; CHECK:         .functype utest_f16i32 (f32) -> (i32)
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    call __truncsfhf2
@@ -153,9 +153,9 @@ entry:
   ret i32 %conv6
 }
 
-define i32 @utesth_f16i32_cse(half %x) {
-; CHECK-LABEL: utesth_f16i32_cse:
-; CHECK:         .functype utesth_f16i32_cse (f32) -> (i32)
+define i32 @utest_f16i32_cse(half %x) {
+; CHECK-LABEL: utest_f16i32_cse:
+; CHECK:         .functype utest_f16i32_cse (f32) -> (i32)
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    call __truncsfhf2
@@ -403,9 +403,9 @@ entry:
   ret i16 %conv6
 }
 
-define i16 @utesth_f16i16(half %x) {
-; CHECK-LABEL: utesth_f16i16:
-; CHECK:         .functype utesth_f16i16 (f32) -> (i32)
+define i16 @utest_f16i16(half %x) {
+; CHECK-LABEL: utest_f16i16:
+; CHECK:         .functype utest_f16i16 (f32) -> (i32)
 ; CHECK-NEXT:    .local i32
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
@@ -427,9 +427,9 @@ entry:
   ret i16 %conv6
 }
 
-define i16 @utesth_f16i16_cse(half %x) {
-; CHECK-LABEL: utesth_f16i16_cse:
-; CHECK:         .functype utesth_f16i16_cse (f32) -> (i32)
+define i16 @utest_f16i16_cse(half %x) {
+; CHECK-LABEL: utest_f16i16_cse:
+; CHECK:         .functype utest_f16i16_cse (f32) -> (i32)
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    call __truncsfhf2
@@ -880,9 +880,9 @@ entry:
   ret i64 %conv6
 }
 
-define i64 @utesth_f16i64(half %x) {
-; CHECK-LABEL: utesth_f16i64:
-; CHECK:         .functype utesth_f16i64 (f32) -> (i64)
+define i64 @utest_f16i64(half %x) {
+; CHECK-LABEL: utest_f16i64:
+; CHECK:         .functype utest_f16i64 (f32) -> (i64)
 ; CHECK-NEXT:    .local i32, i64, i64
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    global.get __stack_pointer
@@ -919,9 +919,9 @@ entry:
   ret i64 %conv6
 }
 
-define i64 @utesth_f16i64_cse(half %x) {
-; CHECK-LABEL: utesth_f16i64_cse:
-; CHECK:         .functype utesth_f16i64_cse (f32) -> (i64)
+define i64 @utest_f16i64_cse(half %x) {
+; CHECK-LABEL: utest_f16i64_cse:
+; CHECK:         .functype utest_f16i64_cse (f32) -> (i64)
 ; CHECK-NEXT:    .local i32, i64
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    global.get __stack_pointer
@@ -1118,9 +1118,9 @@ entry:
   ret i32 %conv6
 }
 
-define i32 @utesth_f16i32_mm(half %x) {
-; CHECK-LABEL: utesth_f16i32_mm:
-; CHECK:         .functype utesth_f16i32_mm (f32) -> (i32)
+define i32 @utest_f16i32_mm(half %x) {
+; CHECK-LABEL: utest_f16i32_mm:
+; CHECK:         .functype utest_f16i32_mm (f32) -> (i32)
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    call __truncsfhf2
@@ -1353,9 +1353,9 @@ entry:
   ret i16 %conv6
 }
 
-define i16 @utesth_f16i16_mm(half %x) {
-; CHECK-LABEL: utesth_f16i16_mm:
-; CHECK:         .functype utesth_f16i16_mm (f32) -> (i32)
+define i16 @utest_f16i16_mm(half %x) {
+; CHECK-LABEL: utest_f16i16_mm:
+; CHECK:         .functype utest_f16i16_mm (f32) -> (i32)
 ; CHECK-NEXT:    .local i32
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
@@ -1637,9 +1637,9 @@ entry:
   ret i64 %conv6
 }
 
-define i64 @utesth_f16i64_mm(half %x) {
-; CHECK-LABEL: utesth_f16i64_mm:
-; CHECK:         .functype utesth_f16i64_mm (f32) -> (i64)
+define i64 @utest_f16i64_mm(half %x) {
+; CHECK-LABEL: utest_f16i64_mm:
+; CHECK:         .functype utest_f16i64_mm (f32) -> (i64)
 ; CHECK-NEXT:    .local i32, i64, i64
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    global.get __stack_pointer
@@ -1724,9 +1724,9 @@ entry:
   ret i64 %conv6
 }
 
-define i64 @utesth_f16i64_mm_cse(half %x) {
-; CHECK-LABEL: utesth_f16i64_mm_cse:
-; CHECK:         .functype utesth_f16i64_mm_cse (f32) -> (i64)
+define i64 @utest_f16i64_mm_cse(half %x) {
+; CHECK-LABEL: utest_f16i64_mm_cse:
+; CHECK:         .functype utest_f16i64_mm_cse (f32) -> (i64)
 ; CHECK-NEXT:    .local i32, i64
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    global.get __stack_pointer
@@ -1754,6 +1754,35 @@ entry:
   ret i64 %conv6
 }
 
+; i32 non saturate
+
+define i32 @ustest_f16i32_nsat(half %x) {
+; CHECK-LABEL: ustest_f16i32_nsat:
+; CHECK:         .functype ustest_f16i32_nsat (f32) -> (i32)
+; CHECK-NEXT:    .local i32
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    call __truncsfhf2
+; CHECK-NEXT:    call __extendhfsf2
+; CHECK-NEXT:    i32.trunc_sat_f32_s
+; CHECK-NEXT:    local.tee 1
+; CHECK-NEXT:    i32.const 31
+; CHECK-NEXT:    i32.shr_s
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    i32.and
+; CHECK-NEXT:    local.tee 1
+; CHECK-NEXT:    i32.const 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    i32.const 0
+; CHECK-NEXT:    i32.gt_s
+; CHECK-NEXT:    i32.select
+; CHECK-NEXT:    # fallthrough-return
+  %conv = fptosi half %x to i32
+  %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv)
+  %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+  ret i32 %spec.store.select7
+}
+
 declare i32 @llvm.smin.i32(i32, i32)
 declare i32 @llvm.smax.i32(i32, i32)
 declare i32 @llvm.umin.i32(i32, i32)
diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
index 7190e162eb010..52f57dc7d1a3e 100644
--- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
@@ -209,9 +209,9 @@ entry:
   ret <4 x i32> %conv6
 }
 
-define <4 x i32> @utesth_f16i32(<4 x half> %x) {
-; CHECK-LABEL: utesth_f16i32:
-; CHECK:         .functype utesth_f16i32 (f32, f32, f32, f32) -> (v128)
+define <4 x i32> @utest_f16i32(<4 x half> %x) {
+; CHECK-LABEL: utest_f16i32:
+; CHECK:         .functype utest_f16i32 (f32, f32, f32, f32) -> (v128)
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    call __truncsfhf2
@@ -513,9 +513,9 @@ entry:
   ret <8 x i16> %conv6
 }
 
-define <8 x i16> @utesth_f16i16(<8 x half> %x) {
-; CHECK-LABEL: utesth_f16i16:
-; CHECK:         .functype utesth_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
+define <8 x i16> @utest_f16i16(<8 x half> %x) {
+; CHECK-LABEL: utest_f16i16:
+; CHECK:         .functype utest_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
 ; CHECK-NEXT:    .local v128
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 5
@@ -1295,9 +1295,9 @@ entry:
   ret <2 x i64> %conv6
 }
 
-define <2 x i64> @utesth_f16i64(<2 x half> %x) {
-; CHECK-LABEL: utesth_f16i64:
-; CHECK:         .functype utesth_f16i64 (f32, f32) -> (v128)
+define <2 x i64> @utest_f16i64(<2 x half> %x) {
+; CHECK-LABEL: utest_f16i64:
+; CHECK:         .functype utest_f16i64 (f32, f32) -> (v128)
 ; CHECK-NEXT:    .local i32, i64, i64, i64, i64
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    global.get __stack_pointer
@@ -1649,9 +1649,9 @@ entry:
   ret <4 x i32> %conv6
 }
 
-define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
-; CHECK-LABEL: utesth_f16i32_mm:
-; CHECK:         .functype utesth_f16i32_mm (f32, f32, f32, f32) -> (v128)
+define <4 x i32> @utest_f16i32_mm(<4 x half> %x) {
+; CHECK-LABEL: utest_f16i32_mm:
+; CHECK:         .functype utest_f16i32_mm (f32, f32, f32, f32) -> (v128)
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    call __truncsfhf2
@@ -1938,9 +1938,9 @@ entry:
   ret <8 x i16> %conv6
 }
 
-define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
-; CHECK-LABEL: utesth_f16i16_mm:
-; CHECK:         .functype utesth_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
+define <8 x i16> @utest_f16i16_mm(<8 x half> %x) {
+; CHECK-LABEL: utest_f16i16_mm:
+; CHECK:         .functype utest_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
 ; CHECK-NEXT:    .local v128
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 5
@@ -2673,9 +2673,9 @@ entry:
   ret <2 x i64> %conv6
 }
 
-define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
-; CHECK-LABEL: utesth_f16i64_mm:
-; CHECK:         .functype utesth_f16i64_mm (f32, f32) -> (v128)
+define <2 x i64> @utest_f16i64_mm(<2 x half> %x) {
+; CHECK-LABEL: utest_f16i64_mm:
+; CHECK:         .functype utest_f16i64_mm (f32, f32) -> (v128)
 ; CHECK-NEXT:    .local i32, i64, i64, i64, i64
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    global.get __stack_pointer
@@ -2810,6 +2810,48 @@ entry:
   ret <2 x i64> %conv6
 }
 
+; i32 non saturate
+
+define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) {
+; CHECK-LABEL: ustest_f16i32_nsat:
+; CHECK:         .functype ustest_f16i32_nsat (f32, f32, f32, f32) -> (v128)
+; CHECK-NEXT:    .local v128
+; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    call __truncsfhf2
+; CHECK-NEXT:    call __extendhfsf2
+; CHECK-NEXT:    local.set 1
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    call __truncsfhf2
+; CHECK-NEXT:    call __extendhfsf2
+; CHECK-NEXT:    i32.trunc_sat_f32_s
+; CHECK-NEXT:    i32x4.splat
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    i32.trunc_sat_f32_s
+; CHECK-NEXT:    i32x4.replace_lane 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    call __truncsfhf2
+; CHECK-NEXT:    call __extendhfsf2
+; CHECK-NEXT:    i32.trunc_sat_f32_s
+; CHECK-NEXT:    i32x4.replace_lane 2
+; CHECK-NEXT:    local.get 3
+; CHECK-NEXT:    call __truncsfhf2
+; CHECK-NEXT:    call __extendhfsf2
+; CHECK-NEXT:    i32.trunc_sat_f32_s
+; CHECK-NEXT:    i32x4.replace_lane 3
+; CHECK-NEXT:    v128.const 0, 0, 0, 0
+; CHECK-NEXT:    local.tee 4
+; CHECK-NEXT:    i32x4.min_s
+; CHECK-NEXT:    local.get 4
+; CHECK-NEXT:    i32x4.max_s
+; CHECK-NEXT:    # fallthrough-return
+entry:
+  %conv = fptosi <4 x half> %x to <4 x i32>
+  %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv)
+  %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer)
+  ret <4 x i32> %spec.store.select7
+}
+
 declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
 declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
 declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)
diff --git a/llvm/test/CodeGen/X86/fpclamptosat.ll b/llvm/test/CodeGen/X86/fpclamptosat.ll
index 65492e772cebf..67483be7a3fde 100644
--- a/llvm/test/CodeGen/X86/fpclamptosat.ll
+++ b/llvm/test/CodeGen/X86/fpclamptosat.ll
@@ -161,8 +161,8 @@ entry:
   ret i32 %conv6
 }
 
-define i32 @utesth_f16i32(half %x) nounwind {
-; CHECK-LABEL: utesth_f16i32:
+define i32 @utest_f16i32(half %x) nounwind {
+; CHECK-LABEL: utest_f16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    callq __extendhfsf2 at PLT
@@ -360,8 +360,8 @@ entry:
   ret i16 %conv6
 }
 
-define i16 @utesth_f16i16(half %x) nounwind {
-; CHECK-LABEL: utesth_f16i16:
+define i16 @utest_f16i16(half %x) nounwind {
+; CHECK-LABEL: utest_f16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    callq __extendhfsf2 at PLT
@@ -566,8 +566,8 @@ entry:
   ret i64 %conv6
 }
 
-define i64 @utesth_f16i64(half %x) nounwind {
-; CHECK-LABEL: utesth_f16i64:
+define i64 @utest_f16i64(half %x) nounwind {
+; CHECK-LABEL: utest_f16i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    callq __fixunshfti at PLT
@@ -762,8 +762,8 @@ entry:
   ret i32 %conv6
 }
 
-define i32 @utesth_f16i32_mm(half %x) nounwind {
-; CHECK-LABEL: utesth_f16i32_mm:
+define i32 @utest_f16i32_mm(half %x) nounwind {
+; CHECK-LABEL: utest_f16i32_mm:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    callq __extendhfsf2 at PLT
@@ -946,8 +946,8 @@ entry:
   ret i16 %conv6
 }
 
-define i16 @utesth_f16i16_mm(half %x) nounwind {
-; CHECK-LABEL: utesth_f16i16_mm:
+define i16 @utest_f16i16_mm(half %x) nounwind {
+; CHECK-LABEL: utest_f16i16_mm:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    callq __extendhfsf2 at PLT
@@ -1131,8 +1131,8 @@ entry:
   ret i64 %conv6
 }
 
-define i64 @utesth_f16i64_mm(half %x) nounwind {
-; CHECK-LABEL: utesth_f16i64_mm:
+define i64 @utest_f16i64_mm(half %x) nounwind {
+; CHECK-LABEL: utest_f16i64_mm:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    callq __fixunshfti at PLT
@@ -1170,20 +1170,25 @@ entry:
   ret i64 %conv6
 }
 
-define i32 @ustest_f16const_i32() {
-; CHECK-LABEL: ustest_f16const_i32:
+; i32 non saturate
+
+define i32 @ustest_f16i32_nsat(half %x) nounwind {
+; CHECK-LABEL: ustest_f16i32_nsat:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ecx
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-NEXT:    cvttss2si %xmm0, %ecx
 ; CHECK-NEXT:    movl %ecx, %eax
 ; CHECK-NEXT:    sarl $31, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    andl %ecx, %eax
 ; CHECK-NEXT:    cmovlel %edx, %eax
+; CHECK-NEXT:    popq %rcx
 ; CHECK-NEXT:    retq
-  %1 = fptosi half 0xH7E00 to i32
-  %2 = call i32 @llvm.smin.i32(i32 0, i32 %1)
-  %3 = call i32 @llvm.smax.i32(i32 %2, i32 0)
-  ret i32 %3
+  %conv = fptosi half %x to i32
+  %spec.store.select = call i32 @llvm.smin.i32(i32 0, i32 %conv)
+  %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+  ret i32 %spec.store.select7
 }
 
 declare i32 @llvm.smin.i32(i32, i32)
diff --git a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll
index 1a2cfd69650b8..991ce33e58b4c 100644
--- a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll
@@ -747,8 +747,8 @@ entry:
   ret <4 x i32> %conv6
 }
 
-define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind {
-; SSE-LABEL: utesth_f16i32:
+define <4 x i32> @utest_f16i32(<4 x half> %x) nounwind {
+; SSE-LABEL: utest_f16i32:
 ; SSE:       # %bb.0: # %entry
 ; SSE-NEXT:    subq $72, %rsp
 ; SSE-NEXT:    movaps %xmm0, %xmm1
@@ -835,7 +835,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind {
 ; SSE-NEXT:    addq $72, %rsp
 ; SSE-NEXT:    retq
 ;
-; AVX2-LABEL: utesth_f16i32:
+; AVX2-LABEL: utest_f16i32:
 ; AVX2:       # %bb.0: # %entry
 ; AVX2-NEXT:    vpsrlq $48, %xmm0, %xmm1
 ; AVX2-NEXT:    vcvtph2ps %xmm1, %xmm2
@@ -893,7 +893,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: utesth_f16i32:
+; AVX512-LABEL: utest_f16i32:
 ; AVX512:       # %bb.0: # %entry
 ; AVX512-NEXT:    vcvtph2ps %xmm0, %xmm0
 ; AVX512-NEXT:    vcvttps2uqq %ymm0, %zmm0
@@ -1338,8 +1338,8 @@ entry:
   ret <8 x i16> %conv6
 }
 
-define <8 x i16> @utesth_f16i16(<8 x half> %x) nounwind {
-; SSE-LABEL: utesth_f16i16:
+define <8 x i16> @utest_f16i16(<8 x half> %x) nounwind {
+; SSE-LABEL: utest_f16i16:
 ; SSE:       # %bb.0: # %entry
 ; SSE-NEXT:    subq $72, %rsp
 ; SSE-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
@@ -1436,7 +1436,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) nounwind {
 ; SSE-NEXT:    addq $72, %rsp
 ; SSE-NEXT:    retq
 ;
-; AVX2-LABEL: utesth_f16i16:
+; AVX2-LABEL: utest_f16i16:
 ; AVX2:       # %bb.0: # %entry
 ; AVX2-NEXT:    vcvtph2ps %xmm0, %ymm0
 ; AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
@@ -1453,7 +1453,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) nounwind {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: utesth_f16i16:
+; AVX512-LABEL: utest_f16i16:
 ; AVX512:       # %bb.0: # %entry
 ; AVX512-NEXT:    vcvtph2ps %xmm0, %ymm0
 ; AVX512-NEXT:    vcvttps2udq %ymm0, %ymm0
@@ -2456,8 +2456,8 @@ entry:
   ret <2 x i64> %conv6
 }
 
-define <2 x i64> @utesth_f16i64(<2 x half> %x) nounwind {
-; SSE-LABEL: utesth_f16i64:
+define <2 x i64> @utest_f16i64(<2 x half> %x) nounwind {
+; SSE-LABEL: utest_f16i64:
 ; SSE:       # %bb.0: # %entry
 ; SSE-NEXT:    pushq %r14
 ; SSE-NEXT:    pushq %rbx
@@ -2483,7 +2483,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) nounwind {
 ; SSE-NEXT:    popq %r14
 ; SSE-NEXT:    retq
 ;
-; AVX2-LABEL: utesth_f16i64:
+; AVX2-LABEL: utest_f16i64:
 ; AVX2:       # %bb.0: # %entry
 ; AVX2-NEXT:    pushq %r14
 ; AVX2-NEXT:    pushq %rbx
@@ -2508,7 +2508,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) nounwind {
 ; AVX2-NEXT:    popq %r14
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: utesth_f16i64:
+; AVX512-LABEL: utest_f16i64:
 ; AVX512:       # %bb.0: # %entry
 ; AVX512-NEXT:    pushq %r14
 ; AVX512-NEXT:    pushq %rbx
@@ -3359,8 +3359,8 @@ entry:
   ret <4 x i32> %conv6
 }
 
-define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind {
-; SSE-LABEL: utesth_f16i32_mm:
+define <4 x i32> @utest_f16i32_mm(<4 x half> %x) nounwind {
+; SSE-LABEL: utest_f16i32_mm:
 ; SSE:       # %bb.0: # %entry
 ; SSE-NEXT:    subq $72, %rsp
 ; SSE-NEXT:    movaps %xmm0, %xmm1
@@ -3447,7 +3447,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind {
 ; SSE-NEXT:    addq $72, %rsp
 ; SSE-NEXT:    retq
 ;
-; AVX2-LABEL: utesth_f16i32_mm:
+; AVX2-LABEL: utest_f16i32_mm:
 ; AVX2:       # %bb.0: # %entry
 ; AVX2-NEXT:    vpsrlq $48, %xmm0, %xmm1
 ; AVX2-NEXT:    vcvtph2ps %xmm1, %xmm2
@@ -3505,7 +3505,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: utesth_f16i32_mm:
+; AVX512-LABEL: utest_f16i32_mm:
 ; AVX512:       # %bb.0: # %entry
 ; AVX512-NEXT:    vcvtph2ps %xmm0, %xmm0
 ; AVX512-NEXT:    vcvttps2uqq %ymm0, %zmm0
@@ -3935,8 +3935,8 @@ entry:
   ret <8 x i16> %conv6
 }
 
-define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) nounwind {
-; SSE-LABEL: utesth_f16i16_mm:
+define <8 x i16> @utest_f16i16_mm(<8 x half> %x) nounwind {
+; SSE-LABEL: utest_f16i16_mm:
 ; SSE:       # %bb.0: # %entry
 ; SSE-NEXT:    subq $72, %rsp
 ; SSE-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
@@ -4033,7 +4033,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) nounwind {
 ; SSE-NEXT:    addq $72, %rsp
 ; SSE-NEXT:    retq
 ;
-; AVX2-LABEL: utesth_f16i16_mm:
+; AVX2-LABEL: utest_f16i16_mm:
 ; AVX2:       # %bb.0: # %entry
 ; AVX2-NEXT:    vcvtph2ps %xmm0, %ymm0
 ; AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
@@ -4050,7 +4050,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) nounwind {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: utesth_f16i16_mm:
+; AVX512-LABEL: utest_f16i16_mm:
 ; AVX512:       # %bb.0: # %entry
 ; AVX512-NEXT:    vcvtph2ps %xmm0, %ymm0
 ; AVX512-NEXT:    vcvttps2udq %ymm0, %ymm0
@@ -4820,8 +4820,8 @@ entry:
   ret <2 x i64> %conv6
 }
 
-define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) nounwind {
-; SSE-LABEL: utesth_f16i64_mm:
+define <2 x i64> @utest_f16i64_mm(<2 x half> %x) nounwind {
+; SSE-LABEL: utest_f16i64_mm:
 ; SSE:       # %bb.0: # %entry
 ; SSE-NEXT:    pushq %r14
 ; SSE-NEXT:    pushq %rbx
@@ -4847,7 +4847,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) nounwind {
 ; SSE-NEXT:    popq %r14
 ; SSE-NEXT:    retq
 ;
-; AVX2-LABEL: utesth_f16i64_mm:
+; AVX2-LABEL: utest_f16i64_mm:
 ; AVX2:       # %bb.0: # %entry
 ; AVX2-NEXT:    pushq %r14
 ; AVX2-NEXT:    pushq %rbx
@@ -4872,7 +4872,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) nounwind {
 ; AVX2-NEXT:    popq %r14
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: utesth_f16i64_mm:
+; AVX512-LABEL: utest_f16i64_mm:
 ; AVX512:       # %bb.0: # %entry
 ; AVX512-NEXT:    pushq %r14
 ; AVX512-NEXT:    pushq %rbx
@@ -4974,6 +4974,63 @@ entry:
   ret <2 x i64> %conv6
 }
 
+; i32 non saturate
+
+define <4 x i32> @ustest_f16i32_nsat(<4 x half> %x) nounwind {
+; SSE-LABEL: ustest_f16i32_nsat:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    subq $72, %rsp
+; SSE-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    psrld $16, %xmm1
+; SSE-NEXT:    movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
+; SSE-NEXT:    movaps %xmm1, (%rsp) # 16-byte Spill
+; SSE-NEXT:    psrlq $48, %xmm0
+; SSE-NEXT:    callq __extendhfsf2 at PLT
+; SSE-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
+; SSE-NEXT:    callq __extendhfsf2 at PLT
+; SSE-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; SSE-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
+; SSE-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT:    callq __extendhfsf2 at PLT
+; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT:    callq __extendhfsf2 at PLT
+; SSE-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE-NEXT:    cvttps2dq %xmm1, %xmm0
+; SSE-NEXT:    punpcklqdq (%rsp), %xmm0 # 16-byte Folded Reload
+; SSE-NEXT:    # xmm0 = xmm0[0],mem[0]
+; SSE-NEXT:    pxor %xmm1, %xmm1
+; SSE-NEXT:    pxor %xmm2, %xmm2
+; SSE-NEXT:    pcmpgtd %xmm0, %xmm2
+; SSE-NEXT:    pand %xmm0, %xmm2
+; SSE-NEXT:    movdqa %xmm2, %xmm0
+; SSE-NEXT:    pcmpgtd %xmm1, %xmm0
+; SSE-NEXT:    pand %xmm2, %xmm0
+; SSE-NEXT:    addq $72, %rsp
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: ustest_f16i32_nsat:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
+; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
+; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %conv = fptosi <4 x half> %x to <4 x i32>
+  %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> zeroinitializer, <4 x i32> %conv)
+  %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer)
+  ret <4 x i32> %spec.store.select7
+}
+
 declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
 declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
 declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)

>From baf5cdeb8a0a8f95c19dcf2e546ee6bac4f437c1 Mon Sep 17 00:00:00 2001
From: ningxinr <ningxinr at live.cn>
Date: Mon, 29 Sep 2025 13:36:49 -0700
Subject: [PATCH 3/3] Added the non sat comment

---
 llvm/test/CodeGen/ARM/fpclamptosat.ll   | 2 ++
 llvm/test/CodeGen/RISCV/fpclamptosat.ll | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll
index 37f230abb7a14..a6f0a03fc7e5b 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll
@@ -3985,6 +3985,8 @@ entry:
   ret i32 %spec.store.select7
 }
 
+; i32 non saturate
+
 define i32 @ustest_f16i32_nsat(half %x) {
 ; SOFT-LABEL: ustest_f16i32_nsat:
 ; SOFT:       @ %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
index cb2555731e133..a0d1ecce74e04 100644
--- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll
+++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
@@ -3829,6 +3829,8 @@ entry:
   ret i64 %conv6
 }
 
+; i32 non saturate
+
 define i32 @ustest_f16i32_nsat(half %x) {
 ; RV32-LABEL: ustest_f16i32_nsat:
 ; RV32:       # %bb.0:



More information about the llvm-commits mailing list