[llvm] 410d276 - [DAG] Add tests for fpsti.sat for various architectures. NFC
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 29 13:57:19 PST 2021
Author: David Green
Date: 2021-11-29T21:57:13Z
New Revision: 410d276400a9ee2440387d372db6b0f112853cc0
URL: https://github.com/llvm/llvm-project/commit/410d276400a9ee2440387d372db6b0f112853cc0
DIFF: https://github.com/llvm/llvm-project/commit/410d276400a9ee2440387d372db6b0f112853cc0.diff
LOG: [DAG] Add tests for fpsti.sat for various architectures. NFC
Added:
llvm/test/CodeGen/AArch64/fpclamptosat.ll
llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
llvm/test/CodeGen/RISCV/fpclamptosat.ll
llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll
llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
llvm/test/CodeGen/X86/fpclamptosat.ll
llvm/test/CodeGen/X86/fpclamptosat_vec.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat.ll b/llvm/test/CodeGen/AArch64/fpclamptosat.ll
new file mode 100644
index 0000000000000..6d6c10b770998
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fpclamptosat.ll
@@ -0,0 +1,1301 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-CVT
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+
+; i32 saturate
+
+define i32 @stest_f64i32(double %x) {
+; CHECK-LABEL: stest_f64i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs x8, d0
+; CHECK-NEXT: mov w9, #2147483647
+; CHECK-NEXT: cmp x8, x9
+; CHECK-NEXT: csel x8, x8, x9, lt
+; CHECK-NEXT: mov x9, #-2147483648
+; CHECK-NEXT: cmp x8, x9
+; CHECK-NEXT: mov w9, #-2147483648
+; CHECK-NEXT: csel w0, w8, w9, gt
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi double %x to i64
+ %0 = icmp slt i64 %conv, 2147483647
+ %spec.store.select = select i1 %0, i64 %conv, i64 2147483647
+ %1 = icmp sgt i64 %spec.store.select, -2147483648
+ %spec.store.select7 = select i1 %1, i64 %spec.store.select, i64 -2147483648
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @utest_f64i32(double %x) {
+; CHECK-LABEL: utest_f64i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu x8, d0
+; CHECK-NEXT: mov w9, #-1
+; CHECK-NEXT: cmp x8, x9
+; CHECK-NEXT: csinv w0, w8, wzr, lo
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui double %x to i64
+ %0 = icmp ult i64 %conv, 4294967295
+ %spec.store.select = select i1 %0, i64 %conv, i64 4294967295
+ %conv6 = trunc i64 %spec.store.select to i32
+ ret i32 %conv6
+}
+
+define i32 @ustest_f64i32(double %x) {
+; CHECK-LABEL: ustest_f64i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs x8, d0
+; CHECK-NEXT: mov w9, #-1
+; CHECK-NEXT: cmp x8, x9
+; CHECK-NEXT: csel x8, x8, x9, lt
+; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: csel w0, w8, wzr, gt
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi double %x to i64
+ %0 = icmp slt i64 %conv, 4294967295
+ %spec.store.select = select i1 %0, i64 %conv, i64 4294967295
+ %1 = icmp sgt i64 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i64 %spec.store.select, i64 0
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @stest_f32i32(float %x) {
+; CHECK-LABEL: stest_f32i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs x8, s0
+; CHECK-NEXT: mov w9, #2147483647
+; CHECK-NEXT: cmp x8, x9
+; CHECK-NEXT: csel x8, x8, x9, lt
+; CHECK-NEXT: mov x9, #-2147483648
+; CHECK-NEXT: cmp x8, x9
+; CHECK-NEXT: mov w9, #-2147483648
+; CHECK-NEXT: csel w0, w8, w9, gt
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi float %x to i64
+ %0 = icmp slt i64 %conv, 2147483647
+ %spec.store.select = select i1 %0, i64 %conv, i64 2147483647
+ %1 = icmp sgt i64 %spec.store.select, -2147483648
+ %spec.store.select7 = select i1 %1, i64 %spec.store.select, i64 -2147483648
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @utest_f32i32(float %x) {
+; CHECK-LABEL: utest_f32i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu x8, s0
+; CHECK-NEXT: mov w9, #-1
+; CHECK-NEXT: cmp x8, x9
+; CHECK-NEXT: csinv w0, w8, wzr, lo
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui float %x to i64
+ %0 = icmp ult i64 %conv, 4294967295
+ %spec.store.select = select i1 %0, i64 %conv, i64 4294967295
+ %conv6 = trunc i64 %spec.store.select to i32
+ ret i32 %conv6
+}
+
+define i32 @ustest_f32i32(float %x) {
+; CHECK-LABEL: ustest_f32i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs x8, s0
+; CHECK-NEXT: mov w9, #-1
+; CHECK-NEXT: cmp x8, x9
+; CHECK-NEXT: csel x8, x8, x9, lt
+; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: csel w0, w8, wzr, gt
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi float %x to i64
+ %0 = icmp slt i64 %conv, 4294967295
+ %spec.store.select = select i1 %0, i64 %conv, i64 4294967295
+ %1 = icmp sgt i64 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i64 %spec.store.select, i64 0
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @stest_f16i32(half %x) {
+; CHECK-CVT-LABEL: stest_f16i32:
+; CHECK-CVT: // %bb.0: // %entry
+; CHECK-CVT-NEXT: fcvt s0, h0
+; CHECK-CVT-NEXT: mov w9, #2147483647
+; CHECK-CVT-NEXT: fcvtzs x8, s0
+; CHECK-CVT-NEXT: cmp x8, x9
+; CHECK-CVT-NEXT: csel x8, x8, x9, lt
+; CHECK-CVT-NEXT: mov x9, #-2147483648
+; CHECK-CVT-NEXT: cmp x8, x9
+; CHECK-CVT-NEXT: mov w9, #-2147483648
+; CHECK-CVT-NEXT: csel w0, w8, w9, gt
+; CHECK-CVT-NEXT: ret
+;
+; CHECK-FP16-LABEL: stest_f16i32:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: fcvtzs x8, h0
+; CHECK-FP16-NEXT: mov w9, #2147483647
+; CHECK-FP16-NEXT: cmp x8, x9
+; CHECK-FP16-NEXT: csel x8, x8, x9, lt
+; CHECK-FP16-NEXT: mov x9, #-2147483648
+; CHECK-FP16-NEXT: cmp x8, x9
+; CHECK-FP16-NEXT: mov w9, #-2147483648
+; CHECK-FP16-NEXT: csel w0, w8, w9, gt
+; CHECK-FP16-NEXT: ret
+entry:
+ %conv = fptosi half %x to i64
+ %0 = icmp slt i64 %conv, 2147483647
+ %spec.store.select = select i1 %0, i64 %conv, i64 2147483647
+ %1 = icmp sgt i64 %spec.store.select, -2147483648
+ %spec.store.select7 = select i1 %1, i64 %spec.store.select, i64 -2147483648
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @utesth_f16i32(half %x) {
+; CHECK-CVT-LABEL: utesth_f16i32:
+; CHECK-CVT: // %bb.0: // %entry
+; CHECK-CVT-NEXT: fcvt s0, h0
+; CHECK-CVT-NEXT: mov w9, #-1
+; CHECK-CVT-NEXT: fcvtzu x8, s0
+; CHECK-CVT-NEXT: cmp x8, x9
+; CHECK-CVT-NEXT: csinv w0, w8, wzr, lo
+; CHECK-CVT-NEXT: ret
+;
+; CHECK-FP16-LABEL: utesth_f16i32:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: fcvtzu x8, h0
+; CHECK-FP16-NEXT: mov w9, #-1
+; CHECK-FP16-NEXT: cmp x8, x9
+; CHECK-FP16-NEXT: csinv w0, w8, wzr, lo
+; CHECK-FP16-NEXT: ret
+entry:
+ %conv = fptoui half %x to i64
+ %0 = icmp ult i64 %conv, 4294967295
+ %spec.store.select = select i1 %0, i64 %conv, i64 4294967295
+ %conv6 = trunc i64 %spec.store.select to i32
+ ret i32 %conv6
+}
+
+define i32 @ustest_f16i32(half %x) {
+; CHECK-CVT-LABEL: ustest_f16i32:
+; CHECK-CVT: // %bb.0: // %entry
+; CHECK-CVT-NEXT: fcvt s0, h0
+; CHECK-CVT-NEXT: mov w9, #-1
+; CHECK-CVT-NEXT: fcvtzs x8, s0
+; CHECK-CVT-NEXT: cmp x8, x9
+; CHECK-CVT-NEXT: csel x8, x8, x9, lt
+; CHECK-CVT-NEXT: cmp x8, #0
+; CHECK-CVT-NEXT: csel w0, w8, wzr, gt
+; CHECK-CVT-NEXT: ret
+;
+; CHECK-FP16-LABEL: ustest_f16i32:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: fcvtzs x8, h0
+; CHECK-FP16-NEXT: mov w9, #-1
+; CHECK-FP16-NEXT: cmp x8, x9
+; CHECK-FP16-NEXT: csel x8, x8, x9, lt
+; CHECK-FP16-NEXT: cmp x8, #0
+; CHECK-FP16-NEXT: csel w0, w8, wzr, gt
+; CHECK-FP16-NEXT: ret
+entry:
+ %conv = fptosi half %x to i64
+ %0 = icmp slt i64 %conv, 4294967295
+ %spec.store.select = select i1 %0, i64 %conv, i64 4294967295
+ %1 = icmp sgt i64 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i64 %spec.store.select, i64 0
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+; i16 saturate
+
+define i16 @stest_f64i16(double %x) {
+; CHECK-LABEL: stest_f64i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs w8, d0
+; CHECK-NEXT: mov w9, #32767
+; CHECK-NEXT: cmp w8, w9
+; CHECK-NEXT: csel w8, w8, w9, lt
+; CHECK-NEXT: mov w9, #-32768
+; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768
+; CHECK-NEXT: csel w0, w8, w9, gt
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi double %x to i32
+ %0 = icmp slt i32 %conv, 32767
+ %spec.store.select = select i1 %0, i32 %conv, i32 32767
+ %1 = icmp sgt i32 %spec.store.select, -32768
+ %spec.store.select7 = select i1 %1, i32 %spec.store.select, i32 -32768
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @utest_f64i16(double %x) {
+; CHECK-LABEL: utest_f64i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu w8, d0
+; CHECK-NEXT: mov w9, #65535
+; CHECK-NEXT: cmp w8, w9
+; CHECK-NEXT: csel w0, w8, w9, lo
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui double %x to i32
+ %0 = icmp ult i32 %conv, 65535
+ %spec.store.select = select i1 %0, i32 %conv, i32 65535
+ %conv6 = trunc i32 %spec.store.select to i16
+ ret i16 %conv6
+}
+
+define i16 @ustest_f64i16(double %x) {
+; CHECK-LABEL: ustest_f64i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs w8, d0
+; CHECK-NEXT: mov w9, #65535
+; CHECK-NEXT: cmp w8, w9
+; CHECK-NEXT: csel w8, w8, w9, lt
+; CHECK-NEXT: bic w0, w8, w8, asr #31
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi double %x to i32
+ %0 = icmp slt i32 %conv, 65535
+ %spec.store.select = select i1 %0, i32 %conv, i32 65535
+ %1 = icmp sgt i32 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i32 %spec.store.select, i32 0
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @stest_f32i16(float %x) {
+; CHECK-LABEL: stest_f32i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs w8, s0
+; CHECK-NEXT: mov w9, #32767
+; CHECK-NEXT: cmp w8, w9
+; CHECK-NEXT: csel w8, w8, w9, lt
+; CHECK-NEXT: mov w9, #-32768
+; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768
+; CHECK-NEXT: csel w0, w8, w9, gt
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi float %x to i32
+ %0 = icmp slt i32 %conv, 32767
+ %spec.store.select = select i1 %0, i32 %conv, i32 32767
+ %1 = icmp sgt i32 %spec.store.select, -32768
+ %spec.store.select7 = select i1 %1, i32 %spec.store.select, i32 -32768
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @utest_f32i16(float %x) {
+; CHECK-LABEL: utest_f32i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu w8, s0
+; CHECK-NEXT: mov w9, #65535
+; CHECK-NEXT: cmp w8, w9
+; CHECK-NEXT: csel w0, w8, w9, lo
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui float %x to i32
+ %0 = icmp ult i32 %conv, 65535
+ %spec.store.select = select i1 %0, i32 %conv, i32 65535
+ %conv6 = trunc i32 %spec.store.select to i16
+ ret i16 %conv6
+}
+
+define i16 @ustest_f32i16(float %x) {
+; CHECK-LABEL: ustest_f32i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs w8, s0
+; CHECK-NEXT: mov w9, #65535
+; CHECK-NEXT: cmp w8, w9
+; CHECK-NEXT: csel w8, w8, w9, lt
+; CHECK-NEXT: bic w0, w8, w8, asr #31
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi float %x to i32
+ %0 = icmp slt i32 %conv, 65535
+ %spec.store.select = select i1 %0, i32 %conv, i32 65535
+ %1 = icmp sgt i32 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i32 %spec.store.select, i32 0
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @stest_f16i16(half %x) {
+; CHECK-CVT-LABEL: stest_f16i16:
+; CHECK-CVT: // %bb.0: // %entry
+; CHECK-CVT-NEXT: fcvt s0, h0
+; CHECK-CVT-NEXT: mov w9, #32767
+; CHECK-CVT-NEXT: fcvtzs w8, s0
+; CHECK-CVT-NEXT: cmp w8, w9
+; CHECK-CVT-NEXT: csel w8, w8, w9, lt
+; CHECK-CVT-NEXT: mov w9, #-32768
+; CHECK-CVT-NEXT: cmn w8, #8, lsl #12 // =32768
+; CHECK-CVT-NEXT: csel w0, w8, w9, gt
+; CHECK-CVT-NEXT: ret
+;
+; CHECK-FP16-LABEL: stest_f16i16:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: fcvtzs w8, h0
+; CHECK-FP16-NEXT: mov w9, #32767
+; CHECK-FP16-NEXT: cmp w8, w9
+; CHECK-FP16-NEXT: csel w8, w8, w9, lt
+; CHECK-FP16-NEXT: mov w9, #-32768
+; CHECK-FP16-NEXT: cmn w8, #8, lsl #12 // =32768
+; CHECK-FP16-NEXT: csel w0, w8, w9, gt
+; CHECK-FP16-NEXT: ret
+entry:
+ %conv = fptosi half %x to i32
+ %0 = icmp slt i32 %conv, 32767
+ %spec.store.select = select i1 %0, i32 %conv, i32 32767
+ %1 = icmp sgt i32 %spec.store.select, -32768
+ %spec.store.select7 = select i1 %1, i32 %spec.store.select, i32 -32768
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @utesth_f16i16(half %x) {
+; CHECK-CVT-LABEL: utesth_f16i16:
+; CHECK-CVT: // %bb.0: // %entry
+; CHECK-CVT-NEXT: fcvt s0, h0
+; CHECK-CVT-NEXT: mov w9, #65535
+; CHECK-CVT-NEXT: fcvtzu w8, s0
+; CHECK-CVT-NEXT: cmp w8, w9
+; CHECK-CVT-NEXT: csel w0, w8, w9, lo
+; CHECK-CVT-NEXT: ret
+;
+; CHECK-FP16-LABEL: utesth_f16i16:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: fcvtzu w8, h0
+; CHECK-FP16-NEXT: mov w9, #65535
+; CHECK-FP16-NEXT: cmp w8, w9
+; CHECK-FP16-NEXT: csel w0, w8, w9, lo
+; CHECK-FP16-NEXT: ret
+entry:
+ %conv = fptoui half %x to i32
+ %0 = icmp ult i32 %conv, 65535
+ %spec.store.select = select i1 %0, i32 %conv, i32 65535
+ %conv6 = trunc i32 %spec.store.select to i16
+ ret i16 %conv6
+}
+
+define i16 @ustest_f16i16(half %x) {
+; CHECK-CVT-LABEL: ustest_f16i16:
+; CHECK-CVT: // %bb.0: // %entry
+; CHECK-CVT-NEXT: fcvt s0, h0
+; CHECK-CVT-NEXT: mov w9, #65535
+; CHECK-CVT-NEXT: fcvtzs w8, s0
+; CHECK-CVT-NEXT: cmp w8, w9
+; CHECK-CVT-NEXT: csel w8, w8, w9, lt
+; CHECK-CVT-NEXT: bic w0, w8, w8, asr #31
+; CHECK-CVT-NEXT: ret
+;
+; CHECK-FP16-LABEL: ustest_f16i16:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: fcvtzs w8, h0
+; CHECK-FP16-NEXT: mov w9, #65535
+; CHECK-FP16-NEXT: cmp w8, w9
+; CHECK-FP16-NEXT: csel w8, w8, w9, lt
+; CHECK-FP16-NEXT: bic w0, w8, w8, asr #31
+; CHECK-FP16-NEXT: ret
+entry:
+ %conv = fptosi half %x to i32
+ %0 = icmp slt i32 %conv, 65535
+ %spec.store.select = select i1 %0, i32 %conv, i32 65535
+ %1 = icmp sgt i32 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i32 %spec.store.select, i32 0
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+; i64 saturate
+
+define i64 @stest_f64i64(double %x) {
+; CHECK-LABEL: stest_f64i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl __fixdfti
+; CHECK-NEXT: mov x8, #9223372036854775807
+; CHECK-NEXT: cmp x0, x8
+; CHECK-NEXT: cset w9, lo
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: cset w10, lt
+; CHECK-NEXT: csel w9, w9, w10, eq
+; CHECK-NEXT: mov x10, #-9223372036854775808
+; CHECK-NEXT: cmp w9, #0
+; CHECK-NEXT: csel x9, x1, xzr, ne
+; CHECK-NEXT: csel x8, x0, x8, ne
+; CHECK-NEXT: cmp x9, #0
+; CHECK-NEXT: cset w11, ge
+; CHECK-NEXT: cmp x8, x10
+; CHECK-NEXT: cset w12, hi
+; CHECK-NEXT: cmn x9, #1
+; CHECK-NEXT: csel w9, w12, w11, eq
+; CHECK-NEXT: cmp w9, #0
+; CHECK-NEXT: csel x0, x8, x10, ne
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi double %x to i128
+ %0 = icmp slt i128 %conv, 9223372036854775807
+ %spec.store.select = select i1 %0, i128 %conv, i128 9223372036854775807
+ %1 = icmp sgt i128 %spec.store.select, -9223372036854775808
+ %spec.store.select7 = select i1 %1, i128 %spec.store.select, i128 -9223372036854775808
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @utest_f64i64(double %x) {
+; CHECK-LABEL: utest_f64i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl __fixunsdfti
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: csel x0, x0, xzr, eq
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui double %x to i128
+ %0 = icmp ult i128 %conv, 18446744073709551616
+ %spec.store.select = select i1 %0, i128 %conv, i128 18446744073709551616
+ %conv6 = trunc i128 %spec.store.select to i64
+ ret i64 %conv6
+}
+
+define i64 @ustest_f64i64(double %x) {
+; CHECK-LABEL: ustest_f64i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl __fixdfti
+; CHECK-NEXT: cmp x1, #1
+; CHECK-NEXT: csel x8, x0, xzr, lt
+; CHECK-NEXT: csinc x9, x1, xzr, lt
+; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: cset w10, ne
+; CHECK-NEXT: cmp x9, #0
+; CHECK-NEXT: cset w9, gt
+; CHECK-NEXT: csel w9, w10, w9, eq
+; CHECK-NEXT: cmp w9, #0
+; CHECK-NEXT: csel x0, x8, xzr, ne
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi double %x to i128
+ %0 = icmp slt i128 %conv, 18446744073709551616
+ %spec.store.select = select i1 %0, i128 %conv, i128 18446744073709551616
+ %1 = icmp sgt i128 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i128 %spec.store.select, i128 0
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @stest_f32i64(float %x) {
+; CHECK-LABEL: stest_f32i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl __fixsfti
+; CHECK-NEXT: mov x8, #9223372036854775807
+; CHECK-NEXT: cmp x0, x8
+; CHECK-NEXT: cset w9, lo
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: cset w10, lt
+; CHECK-NEXT: csel w9, w9, w10, eq
+; CHECK-NEXT: mov x10, #-9223372036854775808
+; CHECK-NEXT: cmp w9, #0
+; CHECK-NEXT: csel x9, x1, xzr, ne
+; CHECK-NEXT: csel x8, x0, x8, ne
+; CHECK-NEXT: cmp x9, #0
+; CHECK-NEXT: cset w11, ge
+; CHECK-NEXT: cmp x8, x10
+; CHECK-NEXT: cset w12, hi
+; CHECK-NEXT: cmn x9, #1
+; CHECK-NEXT: csel w9, w12, w11, eq
+; CHECK-NEXT: cmp w9, #0
+; CHECK-NEXT: csel x0, x8, x10, ne
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi float %x to i128
+ %0 = icmp slt i128 %conv, 9223372036854775807
+ %spec.store.select = select i1 %0, i128 %conv, i128 9223372036854775807
+ %1 = icmp sgt i128 %spec.store.select, -9223372036854775808
+ %spec.store.select7 = select i1 %1, i128 %spec.store.select, i128 -9223372036854775808
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @utest_f32i64(float %x) {
+; CHECK-LABEL: utest_f32i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl __fixunssfti
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: csel x0, x0, xzr, eq
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui float %x to i128
+ %0 = icmp ult i128 %conv, 18446744073709551616
+ %spec.store.select = select i1 %0, i128 %conv, i128 18446744073709551616
+ %conv6 = trunc i128 %spec.store.select to i64
+ ret i64 %conv6
+}
+
+define i64 @ustest_f32i64(float %x) {
+; CHECK-LABEL: ustest_f32i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl __fixsfti
+; CHECK-NEXT: cmp x1, #1
+; CHECK-NEXT: csel x8, x0, xzr, lt
+; CHECK-NEXT: csinc x9, x1, xzr, lt
+; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: cset w10, ne
+; CHECK-NEXT: cmp x9, #0
+; CHECK-NEXT: cset w9, gt
+; CHECK-NEXT: csel w9, w10, w9, eq
+; CHECK-NEXT: cmp w9, #0
+; CHECK-NEXT: csel x0, x8, xzr, ne
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi float %x to i128
+ %0 = icmp slt i128 %conv, 18446744073709551616
+ %spec.store.select = select i1 %0, i128 %conv, i128 18446744073709551616
+ %1 = icmp sgt i128 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i128 %spec.store.select, i128 0
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @stest_f16i64(half %x) {
+; CHECK-LABEL: stest_f16i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl __fixhfti
+; CHECK-NEXT: mov x8, #9223372036854775807
+; CHECK-NEXT: cmp x0, x8
+; CHECK-NEXT: cset w9, lo
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: cset w10, lt
+; CHECK-NEXT: csel w9, w9, w10, eq
+; CHECK-NEXT: mov x10, #-9223372036854775808
+; CHECK-NEXT: cmp w9, #0
+; CHECK-NEXT: csel x9, x1, xzr, ne
+; CHECK-NEXT: csel x8, x0, x8, ne
+; CHECK-NEXT: cmp x9, #0
+; CHECK-NEXT: cset w11, ge
+; CHECK-NEXT: cmp x8, x10
+; CHECK-NEXT: cset w12, hi
+; CHECK-NEXT: cmn x9, #1
+; CHECK-NEXT: csel w9, w12, w11, eq
+; CHECK-NEXT: cmp w9, #0
+; CHECK-NEXT: csel x0, x8, x10, ne
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi half %x to i128
+ %0 = icmp slt i128 %conv, 9223372036854775807
+ %spec.store.select = select i1 %0, i128 %conv, i128 9223372036854775807
+ %1 = icmp sgt i128 %spec.store.select, -9223372036854775808
+ %spec.store.select7 = select i1 %1, i128 %spec.store.select, i128 -9223372036854775808
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @utesth_f16i64(half %x) {
+; CHECK-LABEL: utesth_f16i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl __fixunshfti
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: csel x0, x0, xzr, eq
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui half %x to i128
+ %0 = icmp ult i128 %conv, 18446744073709551616
+ %spec.store.select = select i1 %0, i128 %conv, i128 18446744073709551616
+ %conv6 = trunc i128 %spec.store.select to i64
+ ret i64 %conv6
+}
+
+define i64 @ustest_f16i64(half %x) {
+; CHECK-LABEL: ustest_f16i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl __fixhfti
+; CHECK-NEXT: cmp x1, #1
+; CHECK-NEXT: csel x8, x0, xzr, lt
+; CHECK-NEXT: csinc x9, x1, xzr, lt
+; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: cset w10, ne
+; CHECK-NEXT: cmp x9, #0
+; CHECK-NEXT: cset w9, gt
+; CHECK-NEXT: csel w9, w10, w9, eq
+; CHECK-NEXT: cmp w9, #0
+; CHECK-NEXT: csel x0, x8, xzr, ne
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi half %x to i128
+ %0 = icmp slt i128 %conv, 18446744073709551616
+ %spec.store.select = select i1 %0, i128 %conv, i128 18446744073709551616
+ %1 = icmp sgt i128 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i128 %spec.store.select, i128 0
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+
+
+; i32 saturate
+
+define i32 @stest_f64i32_mm(double %x) {
+; CHECK-LABEL: stest_f64i32_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs x8, d0
+; CHECK-NEXT: mov w9, #2147483647
+; CHECK-NEXT: cmp x8, x9
+; CHECK-NEXT: csel x8, x8, x9, lt
+; CHECK-NEXT: mov x9, #-2147483648
+; CHECK-NEXT: cmp x8, x9
+; CHECK-NEXT: csel x0, x8, x9, gt
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi double %x to i64
+ %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647)
+ %spec.store.select7 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 -2147483648)
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @utest_f64i32_mm(double %x) {
+; CHECK-LABEL: utest_f64i32_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu x8, d0
+; CHECK-NEXT: mov w9, #-1
+; CHECK-NEXT: cmp x8, x9
+; CHECK-NEXT: csel x0, x8, x9, lo
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui double %x to i64
+ %spec.store.select = call i64 @llvm.umin.i64(i64 %conv, i64 4294967295)
+ %conv6 = trunc i64 %spec.store.select to i32
+ ret i32 %conv6
+}
+
+define i32 @ustest_f64i32_mm(double %x) {
+; CHECK-LABEL: ustest_f64i32_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs x8, d0
+; CHECK-NEXT: mov w9, #-1
+; CHECK-NEXT: cmp x8, x9
+; CHECK-NEXT: csel x8, x8, x9, lt
+; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: csel x0, x8, xzr, gt
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi double %x to i64
+ %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 4294967295)
+ %spec.store.select7 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 0)
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @stest_f32i32_mm(float %x) {
+; CHECK-LABEL: stest_f32i32_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs x8, s0
+; CHECK-NEXT: mov w9, #2147483647
+; CHECK-NEXT: cmp x8, x9
+; CHECK-NEXT: csel x8, x8, x9, lt
+; CHECK-NEXT: mov x9, #-2147483648
+; CHECK-NEXT: cmp x8, x9
+; CHECK-NEXT: csel x0, x8, x9, gt
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi float %x to i64
+ %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647)
+ %spec.store.select7 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 -2147483648)
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @utest_f32i32_mm(float %x) {
+; CHECK-LABEL: utest_f32i32_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu x8, s0
+; CHECK-NEXT: mov w9, #-1
+; CHECK-NEXT: cmp x8, x9
+; CHECK-NEXT: csel x0, x8, x9, lo
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui float %x to i64
+ %spec.store.select = call i64 @llvm.umin.i64(i64 %conv, i64 4294967295)
+ %conv6 = trunc i64 %spec.store.select to i32
+ ret i32 %conv6
+}
+
+define i32 @ustest_f32i32_mm(float %x) {
+; CHECK-LABEL: ustest_f32i32_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs x8, s0
+; CHECK-NEXT: mov w9, #-1
+; CHECK-NEXT: cmp x8, x9
+; CHECK-NEXT: csel x8, x8, x9, lt
+; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: csel x0, x8, xzr, gt
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi float %x to i64
+ %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 4294967295)
+ %spec.store.select7 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 0)
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @stest_f16i32_mm(half %x) {
+; CHECK-CVT-LABEL: stest_f16i32_mm:
+; CHECK-CVT: // %bb.0: // %entry
+; CHECK-CVT-NEXT: fcvt s0, h0
+; CHECK-CVT-NEXT: mov w9, #2147483647
+; CHECK-CVT-NEXT: fcvtzs x8, s0
+; CHECK-CVT-NEXT: cmp x8, x9
+; CHECK-CVT-NEXT: csel x8, x8, x9, lt
+; CHECK-CVT-NEXT: mov x9, #-2147483648
+; CHECK-CVT-NEXT: cmp x8, x9
+; CHECK-CVT-NEXT: csel x0, x8, x9, gt
+; CHECK-CVT-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-CVT-NEXT: ret
+;
+; CHECK-FP16-LABEL: stest_f16i32_mm:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: fcvtzs x8, h0
+; CHECK-FP16-NEXT: mov w9, #2147483647
+; CHECK-FP16-NEXT: cmp x8, x9
+; CHECK-FP16-NEXT: csel x8, x8, x9, lt
+; CHECK-FP16-NEXT: mov x9, #-2147483648
+; CHECK-FP16-NEXT: cmp x8, x9
+; CHECK-FP16-NEXT: csel x0, x8, x9, gt
+; CHECK-FP16-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT: ret
+entry:
+ %conv = fptosi half %x to i64
+ %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647)
+ %spec.store.select7 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 -2147483648)
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @utesth_f16i32_mm(half %x) {
+; CHECK-CVT-LABEL: utesth_f16i32_mm:
+; CHECK-CVT: // %bb.0: // %entry
+; CHECK-CVT-NEXT: fcvt s0, h0
+; CHECK-CVT-NEXT: mov w9, #-1
+; CHECK-CVT-NEXT: fcvtzu x8, s0
+; CHECK-CVT-NEXT: cmp x8, x9
+; CHECK-CVT-NEXT: csel x0, x8, x9, lo
+; CHECK-CVT-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-CVT-NEXT: ret
+;
+; CHECK-FP16-LABEL: utesth_f16i32_mm:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: fcvtzu x8, h0
+; CHECK-FP16-NEXT: mov w9, #-1
+; CHECK-FP16-NEXT: cmp x8, x9
+; CHECK-FP16-NEXT: csel x0, x8, x9, lo
+; CHECK-FP16-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT: ret
+entry:
+ %conv = fptoui half %x to i64
+ %spec.store.select = call i64 @llvm.umin.i64(i64 %conv, i64 4294967295)
+ %conv6 = trunc i64 %spec.store.select to i32
+ ret i32 %conv6
+}
+
+define i32 @ustest_f16i32_mm(half %x) {
+; CHECK-CVT-LABEL: ustest_f16i32_mm:
+; CHECK-CVT: // %bb.0: // %entry
+; CHECK-CVT-NEXT: fcvt s0, h0
+; CHECK-CVT-NEXT: mov w9, #-1
+; CHECK-CVT-NEXT: fcvtzs x8, s0
+; CHECK-CVT-NEXT: cmp x8, x9
+; CHECK-CVT-NEXT: csel x8, x8, x9, lt
+; CHECK-CVT-NEXT: cmp x8, #0
+; CHECK-CVT-NEXT: csel x0, x8, xzr, gt
+; CHECK-CVT-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-CVT-NEXT: ret
+;
+; CHECK-FP16-LABEL: ustest_f16i32_mm:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: fcvtzs x8, h0
+; CHECK-FP16-NEXT: mov w9, #-1
+; CHECK-FP16-NEXT: cmp x8, x9
+; CHECK-FP16-NEXT: csel x8, x8, x9, lt
+; CHECK-FP16-NEXT: cmp x8, #0
+; CHECK-FP16-NEXT: csel x0, x8, xzr, gt
+; CHECK-FP16-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT: ret
+entry:
+ %conv = fptosi half %x to i64
+ %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 4294967295)
+ %spec.store.select7 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 0)
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+; i16 saturate
+
+define i16 @stest_f64i16_mm(double %x) {
+; CHECK-LABEL: stest_f64i16_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs w8, d0
+; CHECK-NEXT: mov w9, #32767
+; CHECK-NEXT: cmp w8, w9
+; CHECK-NEXT: csel w8, w8, w9, lt
+; CHECK-NEXT: mov w9, #-32768
+; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768
+; CHECK-NEXT: csel w0, w8, w9, gt
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi double %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 32767)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 -32768)
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @utest_f64i16_mm(double %x) {
+; CHECK-LABEL: utest_f64i16_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu w8, d0
+; CHECK-NEXT: mov w9, #65535
+; CHECK-NEXT: cmp w8, w9
+; CHECK-NEXT: csel w0, w8, w9, lo
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui double %x to i32
+ %spec.store.select = call i32 @llvm.umin.i32(i32 %conv, i32 65535)
+ %conv6 = trunc i32 %spec.store.select to i16
+ ret i16 %conv6
+}
+
+define i16 @ustest_f64i16_mm(double %x) {
+; CHECK-LABEL: ustest_f64i16_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs w8, d0
+; CHECK-NEXT: mov w9, #65535
+; CHECK-NEXT: cmp w8, w9
+; CHECK-NEXT: csel w8, w8, w9, lt
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: csel w0, w8, wzr, gt
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi double %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 65535)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @stest_f32i16_mm(float %x) {
+; CHECK-LABEL: stest_f32i16_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs w8, s0
+; CHECK-NEXT: mov w9, #32767
+; CHECK-NEXT: cmp w8, w9
+; CHECK-NEXT: csel w8, w8, w9, lt
+; CHECK-NEXT: mov w9, #-32768
+; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768
+; CHECK-NEXT: csel w0, w8, w9, gt
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi float %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 32767)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 -32768)
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @utest_f32i16_mm(float %x) {
+; CHECK-LABEL: utest_f32i16_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu w8, s0
+; CHECK-NEXT: mov w9, #65535
+; CHECK-NEXT: cmp w8, w9
+; CHECK-NEXT: csel w0, w8, w9, lo
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui float %x to i32
+ %spec.store.select = call i32 @llvm.umin.i32(i32 %conv, i32 65535)
+ %conv6 = trunc i32 %spec.store.select to i16
+ ret i16 %conv6
+}
+
+define i16 @ustest_f32i16_mm(float %x) {
+; CHECK-LABEL: ustest_f32i16_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs w8, s0
+; CHECK-NEXT: mov w9, #65535
+; CHECK-NEXT: cmp w8, w9
+; CHECK-NEXT: csel w8, w8, w9, lt
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: csel w0, w8, wzr, gt
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi float %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 65535)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @stest_f16i16_mm(half %x) {
+; CHECK-CVT-LABEL: stest_f16i16_mm:
+; CHECK-CVT: // %bb.0: // %entry
+; CHECK-CVT-NEXT: fcvt s0, h0
+; CHECK-CVT-NEXT: mov w9, #32767
+; CHECK-CVT-NEXT: fcvtzs w8, s0
+; CHECK-CVT-NEXT: cmp w8, w9
+; CHECK-CVT-NEXT: csel w8, w8, w9, lt
+; CHECK-CVT-NEXT: mov w9, #-32768
+; CHECK-CVT-NEXT: cmn w8, #8, lsl #12 // =32768
+; CHECK-CVT-NEXT: csel w0, w8, w9, gt
+; CHECK-CVT-NEXT: ret
+;
+; CHECK-FP16-LABEL: stest_f16i16_mm:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: fcvtzs w8, h0
+; CHECK-FP16-NEXT: mov w9, #32767
+; CHECK-FP16-NEXT: cmp w8, w9
+; CHECK-FP16-NEXT: csel w8, w8, w9, lt
+; CHECK-FP16-NEXT: mov w9, #-32768
+; CHECK-FP16-NEXT: cmn w8, #8, lsl #12 // =32768
+; CHECK-FP16-NEXT: csel w0, w8, w9, gt
+; CHECK-FP16-NEXT: ret
+entry:
+ %conv = fptosi half %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 32767)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 -32768)
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @utesth_f16i16_mm(half %x) {
+; CHECK-CVT-LABEL: utesth_f16i16_mm:
+; CHECK-CVT: // %bb.0: // %entry
+; CHECK-CVT-NEXT: fcvt s0, h0
+; CHECK-CVT-NEXT: mov w9, #65535
+; CHECK-CVT-NEXT: fcvtzu w8, s0
+; CHECK-CVT-NEXT: cmp w8, w9
+; CHECK-CVT-NEXT: csel w0, w8, w9, lo
+; CHECK-CVT-NEXT: ret
+;
+; CHECK-FP16-LABEL: utesth_f16i16_mm:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: fcvtzu w8, h0
+; CHECK-FP16-NEXT: mov w9, #65535
+; CHECK-FP16-NEXT: cmp w8, w9
+; CHECK-FP16-NEXT: csel w0, w8, w9, lo
+; CHECK-FP16-NEXT: ret
+entry:
+ %conv = fptoui half %x to i32
+ %spec.store.select = call i32 @llvm.umin.i32(i32 %conv, i32 65535)
+ %conv6 = trunc i32 %spec.store.select to i16
+ ret i16 %conv6
+}
+
+define i16 @ustest_f16i16_mm(half %x) {
+; CHECK-CVT-LABEL: ustest_f16i16_mm:
+; CHECK-CVT: // %bb.0: // %entry
+; CHECK-CVT-NEXT: fcvt s0, h0
+; CHECK-CVT-NEXT: mov w9, #65535
+; CHECK-CVT-NEXT: fcvtzs w8, s0
+; CHECK-CVT-NEXT: cmp w8, w9
+; CHECK-CVT-NEXT: csel w8, w8, w9, lt
+; CHECK-CVT-NEXT: cmp w8, #0
+; CHECK-CVT-NEXT: csel w0, w8, wzr, gt
+; CHECK-CVT-NEXT: ret
+;
+; CHECK-FP16-LABEL: ustest_f16i16_mm:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: fcvtzs w8, h0
+; CHECK-FP16-NEXT: mov w9, #65535
+; CHECK-FP16-NEXT: cmp w8, w9
+; CHECK-FP16-NEXT: csel w8, w8, w9, lt
+; CHECK-FP16-NEXT: cmp w8, #0
+; CHECK-FP16-NEXT: csel w0, w8, wzr, gt
+; CHECK-FP16-NEXT: ret
+entry:
+ %conv = fptosi half %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 65535)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+; i64 saturate
+
+define i64 @stest_f64i64_mm(double %x) {
+; CHECK-LABEL: stest_f64i64_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl __fixdfti
+; CHECK-NEXT: mov x8, #9223372036854775807
+; CHECK-NEXT: cmp x0, x8
+; CHECK-NEXT: csel x9, x0, x8, lo
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: csel x8, x0, x8, lt
+; CHECK-NEXT: csel x10, x1, xzr, lt
+; CHECK-NEXT: csel x8, x9, x8, eq
+; CHECK-NEXT: cmp x10, #0
+; CHECK-NEXT: mov x9, #-9223372036854775808
+; CHECK-NEXT: csel x11, x8, x9, ge
+; CHECK-NEXT: cmp x8, x9
+; CHECK-NEXT: csel x8, x8, x9, hi
+; CHECK-NEXT: cmn x10, #1
+; CHECK-NEXT: csel x0, x8, x11, eq
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi double %x to i128
+ %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
+ %spec.store.select7 = call i128 @llvm.smax.i128(i128 %spec.store.select, i128 -9223372036854775808)
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @utest_f64i64_mm(double %x) {
+; CHECK-LABEL: utest_f64i64_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl __fixunsdfti
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: csel x8, x0, xzr, eq
+; CHECK-NEXT: cmp x1, #1
+; CHECK-NEXT: csel x0, xzr, x8, eq
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui double %x to i128
+ %spec.store.select = call i128 @llvm.umin.i128(i128 %conv, i128 18446744073709551616)
+ %conv6 = trunc i128 %spec.store.select to i64
+ ret i64 %conv6
+}
+
+define i64 @ustest_f64i64_mm(double %x) {
+; CHECK-LABEL: ustest_f64i64_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl __fixdfti
+; CHECK-NEXT: cmp x1, #1
+; CHECK-NEXT: csel x8, x0, xzr, lt
+; CHECK-NEXT: csinc x9, x1, xzr, lt
+; CHECK-NEXT: csel x8, xzr, x8, eq
+; CHECK-NEXT: cmp x9, #0
+; CHECK-NEXT: csel x9, x8, xzr, gt
+; CHECK-NEXT: csel x0, x8, x9, eq
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi double %x to i128
+ %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 18446744073709551616)
+ %spec.store.select7 = call i128 @llvm.smax.i128(i128 %spec.store.select, i128 0)
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @stest_f32i64_mm(float %x) {
+; CHECK-LABEL: stest_f32i64_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl __fixsfti
+; CHECK-NEXT: mov x8, #9223372036854775807
+; CHECK-NEXT: cmp x0, x8
+; CHECK-NEXT: csel x9, x0, x8, lo
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: csel x8, x0, x8, lt
+; CHECK-NEXT: csel x10, x1, xzr, lt
+; CHECK-NEXT: csel x8, x9, x8, eq
+; CHECK-NEXT: cmp x10, #0
+; CHECK-NEXT: mov x9, #-9223372036854775808
+; CHECK-NEXT: csel x11, x8, x9, ge
+; CHECK-NEXT: cmp x8, x9
+; CHECK-NEXT: csel x8, x8, x9, hi
+; CHECK-NEXT: cmn x10, #1
+; CHECK-NEXT: csel x0, x8, x11, eq
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi float %x to i128
+ %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
+ %spec.store.select7 = call i128 @llvm.smax.i128(i128 %spec.store.select, i128 -9223372036854775808)
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @utest_f32i64_mm(float %x) {
+; CHECK-LABEL: utest_f32i64_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl __fixunssfti
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: csel x8, x0, xzr, eq
+; CHECK-NEXT: cmp x1, #1
+; CHECK-NEXT: csel x0, xzr, x8, eq
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui float %x to i128
+ %spec.store.select = call i128 @llvm.umin.i128(i128 %conv, i128 18446744073709551616)
+ %conv6 = trunc i128 %spec.store.select to i64
+ ret i64 %conv6
+}
+
+define i64 @ustest_f32i64_mm(float %x) {
+; CHECK-LABEL: ustest_f32i64_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl __fixsfti
+; CHECK-NEXT: cmp x1, #1
+; CHECK-NEXT: csel x8, x0, xzr, lt
+; CHECK-NEXT: csinc x9, x1, xzr, lt
+; CHECK-NEXT: csel x8, xzr, x8, eq
+; CHECK-NEXT: cmp x9, #0
+; CHECK-NEXT: csel x9, x8, xzr, gt
+; CHECK-NEXT: csel x0, x8, x9, eq
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi float %x to i128
+ %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 18446744073709551616)
+ %spec.store.select7 = call i128 @llvm.smax.i128(i128 %spec.store.select, i128 0)
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @stest_f16i64_mm(half %x) {
+; CHECK-LABEL: stest_f16i64_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl __fixhfti
+; CHECK-NEXT: mov x8, #9223372036854775807
+; CHECK-NEXT: cmp x0, x8
+; CHECK-NEXT: csel x9, x0, x8, lo
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: csel x8, x0, x8, lt
+; CHECK-NEXT: csel x10, x1, xzr, lt
+; CHECK-NEXT: csel x8, x9, x8, eq
+; CHECK-NEXT: cmp x10, #0
+; CHECK-NEXT: mov x9, #-9223372036854775808
+; CHECK-NEXT: csel x11, x8, x9, ge
+; CHECK-NEXT: cmp x8, x9
+; CHECK-NEXT: csel x8, x8, x9, hi
+; CHECK-NEXT: cmn x10, #1
+; CHECK-NEXT: csel x0, x8, x11, eq
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi half %x to i128
+ %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
+ %spec.store.select7 = call i128 @llvm.smax.i128(i128 %spec.store.select, i128 -9223372036854775808)
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @utesth_f16i64_mm(half %x) {
+; CHECK-LABEL: utesth_f16i64_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl __fixunshfti
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: csel x8, x0, xzr, eq
+; CHECK-NEXT: cmp x1, #1
+; CHECK-NEXT: csel x0, xzr, x8, eq
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui half %x to i128
+ %spec.store.select = call i128 @llvm.umin.i128(i128 %conv, i128 18446744073709551616)
+ %conv6 = trunc i128 %spec.store.select to i64
+ ret i64 %conv6
+}
+
+define i64 @ustest_f16i64_mm(half %x) {
+; CHECK-LABEL: ustest_f16i64_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl __fixhfti
+; CHECK-NEXT: cmp x1, #1
+; CHECK-NEXT: csel x8, x0, xzr, lt
+; CHECK-NEXT: csinc x9, x1, xzr, lt
+; CHECK-NEXT: csel x8, xzr, x8, eq
+; CHECK-NEXT: cmp x9, #0
+; CHECK-NEXT: csel x9, x8, xzr, gt
+; CHECK-NEXT: csel x0, x8, x9, eq
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi half %x to i128
+ %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 18446744073709551616)
+ %spec.store.select7 = call i128 @llvm.smax.i128(i128 %spec.store.select, i128 0)
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+declare i32 @llvm.smin.i32(i32, i32)
+declare i32 @llvm.smax.i32(i32, i32)
+declare i32 @llvm.umin.i32(i32, i32)
+declare i64 @llvm.smin.i64(i64, i64)
+declare i64 @llvm.smax.i64(i64, i64)
+declare i64 @llvm.umin.i64(i64, i64)
+declare i128 @llvm.smin.i128(i128, i128)
+declare i128 @llvm.smax.i128(i128, i128)
+declare i128 @llvm.umin.i128(i128, i128)
diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
new file mode 100644
index 0000000000000..c9c190d30b1c2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
@@ -0,0 +1,1997 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-CVT
+; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+
+; i32 saturate
+
+define <2 x i32> @stest_f64i32(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #2147483647
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: dup v1.2d, x8
+; CHECK-NEXT: mov x8, #-2147483648
+; CHECK-NEXT: cmgt v2.2d, v1.2d, v0.2d
+; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-NEXT: dup v1.2d, x8
+; CHECK-NEXT: cmgt v2.2d, v0.2d, v1.2d
+; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i64>
+ %0 = icmp slt <2 x i64> %conv, <i64 2147483647, i64 2147483647>
+ %spec.store.select = select <2 x i1> %0, <2 x i64> %conv, <2 x i64> <i64 2147483647, i64 2147483647>
+ %1 = icmp sgt <2 x i64> %spec.store.select, <i64 -2147483648, i64 -2147483648>
+ %spec.store.select7 = select <2 x i1> %1, <2 x i64> %spec.store.select, <2 x i64> <i64 -2147483648, i64 -2147483648>
+ %conv6 = trunc <2 x i64> %spec.store.select7 to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <2 x i32> @utest_f64i32(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
+; CHECK-NEXT: fcvtzu v0.2d, v0.2d
+; CHECK-NEXT: cmhi v1.2d, v1.2d, v0.2d
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: orn v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i64>
+ %0 = icmp ult <2 x i64> %conv, <i64 4294967295, i64 4294967295>
+ %spec.store.select = select <2 x i1> %0, <2 x i64> %conv, <2 x i64> <i64 4294967295, i64 4294967295>
+ %conv6 = trunc <2 x i64> %spec.store.select to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <2 x i32> @ustest_f64i32(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: cmgt v2.2d, v1.2d, v0.2d
+; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-NEXT: cmgt v1.2d, v0.2d, #0
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i64>
+ %0 = icmp slt <2 x i64> %conv, <i64 4294967295, i64 4294967295>
+ %spec.store.select = select <2 x i1> %0, <2 x i64> %conv, <2 x i64> <i64 4294967295, i64 4294967295>
+ %1 = icmp sgt <2 x i64> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <2 x i1> %1, <2 x i64> %spec.store.select, <2 x i64> zeroinitializer
+ %conv6 = trunc <2 x i64> %spec.store.select7 to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <4 x i32> @stest_f32i32(<4 x float> %x) {
+; CHECK-LABEL: stest_f32i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtl v1.2d, v0.2s
+; CHECK-NEXT: fcvtl2 v0.2d, v0.4s
+; CHECK-NEXT: mov w8, #2147483647
+; CHECK-NEXT: fcvtzs v1.2d, v1.2d
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: dup v2.2d, x8
+; CHECK-NEXT: mov x8, #-2147483648
+; CHECK-NEXT: cmgt v3.2d, v2.2d, v1.2d
+; CHECK-NEXT: cmgt v4.2d, v2.2d, v0.2d
+; CHECK-NEXT: bif v1.16b, v2.16b, v3.16b
+; CHECK-NEXT: bif v0.16b, v2.16b, v4.16b
+; CHECK-NEXT: dup v2.2d, x8
+; CHECK-NEXT: cmgt v3.2d, v1.2d, v2.2d
+; CHECK-NEXT: cmgt v4.2d, v0.2d, v2.2d
+; CHECK-NEXT: bif v1.16b, v2.16b, v3.16b
+; CHECK-NEXT: bit v2.16b, v0.16b, v4.16b
+; CHECK-NEXT: xtn v0.2s, v1.2d
+; CHECK-NEXT: xtn2 v0.4s, v2.2d
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i64>
+ %0 = icmp slt <4 x i64> %conv, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
+ %1 = icmp sgt <4 x i64> %spec.store.select, <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
+ %spec.store.select7 = select <4 x i1> %1, <4 x i64> %spec.store.select, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @utest_f32i32(<4 x float> %x) {
+; CHECK-LABEL: utest_f32i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtl v2.2d, v0.2s
+; CHECK-NEXT: fcvtl2 v0.2d, v0.4s
+; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
+; CHECK-NEXT: fcvtzu v2.2d, v2.2d
+; CHECK-NEXT: fcvtzu v0.2d, v0.2d
+; CHECK-NEXT: cmhi v3.2d, v1.2d, v2.2d
+; CHECK-NEXT: cmhi v1.2d, v1.2d, v0.2d
+; CHECK-NEXT: and v2.16b, v2.16b, v3.16b
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: orn v2.16b, v2.16b, v3.16b
+; CHECK-NEXT: orn v1.16b, v0.16b, v1.16b
+; CHECK-NEXT: xtn v0.2s, v2.2d
+; CHECK-NEXT: xtn2 v0.4s, v1.2d
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui <4 x float> %x to <4 x i64>
+ %0 = icmp ult <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %conv6 = trunc <4 x i64> %spec.store.select to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @ustest_f32i32(<4 x float> %x) {
+; CHECK-LABEL: ustest_f32i32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtl v2.2d, v0.2s
+; CHECK-NEXT: fcvtl2 v0.2d, v0.4s
+; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
+; CHECK-NEXT: fcvtzs v2.2d, v2.2d
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: cmgt v3.2d, v1.2d, v2.2d
+; CHECK-NEXT: cmgt v4.2d, v1.2d, v0.2d
+; CHECK-NEXT: bif v2.16b, v1.16b, v3.16b
+; CHECK-NEXT: bif v0.16b, v1.16b, v4.16b
+; CHECK-NEXT: cmgt v1.2d, v2.2d, #0
+; CHECK-NEXT: cmgt v3.2d, v0.2d, #0
+; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
+; CHECK-NEXT: and v2.16b, v0.16b, v3.16b
+; CHECK-NEXT: xtn v0.2s, v1.2d
+; CHECK-NEXT: xtn2 v0.4s, v2.2d
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i64>
+ %0 = icmp slt <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %1 = icmp sgt <4 x i64> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <4 x i1> %1, <4 x i64> %spec.store.select, <4 x i64> zeroinitializer
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @stest_f16i32(<4 x half> %x) {
+; CHECK-CVT-LABEL: stest_f16i32:
+; CHECK-CVT: // %bb.0: // %entry
+; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-CVT-NEXT: mov h1, v0.h[2]
+; CHECK-CVT-NEXT: mov h2, v0.h[1]
+; CHECK-CVT-NEXT: fcvt s3, h0
+; CHECK-CVT-NEXT: mov h0, v0.h[3]
+; CHECK-CVT-NEXT: mov w8, #2147483647
+; CHECK-CVT-NEXT: fcvt s1, h1
+; CHECK-CVT-NEXT: fcvt s2, h2
+; CHECK-CVT-NEXT: fcvtzs x9, s3
+; CHECK-CVT-NEXT: fcvt s0, h0
+; CHECK-CVT-NEXT: fcvtzs x10, s1
+; CHECK-CVT-NEXT: fmov d1, x9
+; CHECK-CVT-NEXT: fcvtzs x9, s2
+; CHECK-CVT-NEXT: fmov d2, x10
+; CHECK-CVT-NEXT: fcvtzs x10, s0
+; CHECK-CVT-NEXT: mov v1.d[1], x9
+; CHECK-CVT-NEXT: dup v0.2d, x8
+; CHECK-CVT-NEXT: mov x8, #-2147483648
+; CHECK-CVT-NEXT: mov v2.d[1], x10
+; CHECK-CVT-NEXT: cmgt v3.2d, v0.2d, v1.2d
+; CHECK-CVT-NEXT: cmgt v4.2d, v0.2d, v2.2d
+; CHECK-CVT-NEXT: bif v1.16b, v0.16b, v3.16b
+; CHECK-CVT-NEXT: bit v0.16b, v2.16b, v4.16b
+; CHECK-CVT-NEXT: dup v2.2d, x8
+; CHECK-CVT-NEXT: cmgt v3.2d, v1.2d, v2.2d
+; CHECK-CVT-NEXT: cmgt v4.2d, v0.2d, v2.2d
+; CHECK-CVT-NEXT: bif v1.16b, v2.16b, v3.16b
+; CHECK-CVT-NEXT: bit v2.16b, v0.16b, v4.16b
+; CHECK-CVT-NEXT: xtn v0.2s, v1.2d
+; CHECK-CVT-NEXT: xtn2 v0.4s, v2.2d
+; CHECK-CVT-NEXT: ret
+;
+; CHECK-FP16-LABEL: stest_f16i32:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-FP16-NEXT: mov h1, v0.h[2]
+; CHECK-FP16-NEXT: mov h2, v0.h[1]
+; CHECK-FP16-NEXT: fcvtzs x9, h0
+; CHECK-FP16-NEXT: mov h0, v0.h[3]
+; CHECK-FP16-NEXT: mov w8, #2147483647
+; CHECK-FP16-NEXT: fcvtzs x10, h1
+; CHECK-FP16-NEXT: fmov d1, x9
+; CHECK-FP16-NEXT: fcvtzs x9, h2
+; CHECK-FP16-NEXT: fmov d2, x10
+; CHECK-FP16-NEXT: fcvtzs x10, h0
+; CHECK-FP16-NEXT: mov v1.d[1], x9
+; CHECK-FP16-NEXT: dup v0.2d, x8
+; CHECK-FP16-NEXT: mov x8, #-2147483648
+; CHECK-FP16-NEXT: mov v2.d[1], x10
+; CHECK-FP16-NEXT: cmgt v3.2d, v0.2d, v1.2d
+; CHECK-FP16-NEXT: cmgt v4.2d, v0.2d, v2.2d
+; CHECK-FP16-NEXT: bif v1.16b, v0.16b, v3.16b
+; CHECK-FP16-NEXT: bit v0.16b, v2.16b, v4.16b
+; CHECK-FP16-NEXT: dup v2.2d, x8
+; CHECK-FP16-NEXT: cmgt v3.2d, v1.2d, v2.2d
+; CHECK-FP16-NEXT: cmgt v4.2d, v0.2d, v2.2d
+; CHECK-FP16-NEXT: bif v1.16b, v2.16b, v3.16b
+; CHECK-FP16-NEXT: bit v2.16b, v0.16b, v4.16b
+; CHECK-FP16-NEXT: xtn v0.2s, v1.2d
+; CHECK-FP16-NEXT: xtn2 v0.4s, v2.2d
+; CHECK-FP16-NEXT: ret
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i64>
+ %0 = icmp slt <4 x i64> %conv, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
+ %1 = icmp sgt <4 x i64> %spec.store.select, <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
+ %spec.store.select7 = select <4 x i1> %1, <4 x i64> %spec.store.select, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @utesth_f16i32(<4 x half> %x) {
+; CHECK-CVT-LABEL: utesth_f16i32:
+; CHECK-CVT: // %bb.0: // %entry
+; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-CVT-NEXT: mov h2, v0.h[2]
+; CHECK-CVT-NEXT: mov h3, v0.h[1]
+; CHECK-CVT-NEXT: fcvt s4, h0
+; CHECK-CVT-NEXT: mov h0, v0.h[3]
+; CHECK-CVT-NEXT: movi v1.2d, #0x000000ffffffff
+; CHECK-CVT-NEXT: fcvt s2, h2
+; CHECK-CVT-NEXT: fcvt s3, h3
+; CHECK-CVT-NEXT: fcvtzu x8, s4
+; CHECK-CVT-NEXT: fcvt s0, h0
+; CHECK-CVT-NEXT: fcvtzu x9, s2
+; CHECK-CVT-NEXT: fmov d2, x8
+; CHECK-CVT-NEXT: fcvtzu x8, s3
+; CHECK-CVT-NEXT: fmov d3, x9
+; CHECK-CVT-NEXT: fcvtzu x9, s0
+; CHECK-CVT-NEXT: mov v2.d[1], x8
+; CHECK-CVT-NEXT: mov v3.d[1], x9
+; CHECK-CVT-NEXT: cmhi v0.2d, v1.2d, v2.2d
+; CHECK-CVT-NEXT: cmhi v1.2d, v1.2d, v3.2d
+; CHECK-CVT-NEXT: and v2.16b, v2.16b, v0.16b
+; CHECK-CVT-NEXT: and v3.16b, v3.16b, v1.16b
+; CHECK-CVT-NEXT: orn v0.16b, v2.16b, v0.16b
+; CHECK-CVT-NEXT: orn v1.16b, v3.16b, v1.16b
+; CHECK-CVT-NEXT: xtn v0.2s, v0.2d
+; CHECK-CVT-NEXT: xtn2 v0.4s, v1.2d
+; CHECK-CVT-NEXT: ret
+;
+; CHECK-FP16-LABEL: utesth_f16i32:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-FP16-NEXT: mov h3, v0.h[1]
+; CHECK-FP16-NEXT: fcvtzu x8, h0
+; CHECK-FP16-NEXT: mov h0, v0.h[3]
+; CHECK-FP16-NEXT: movi v1.2d, #0x000000ffffffff
+; CHECK-FP16-NEXT: fcvtzu x9, h2
+; CHECK-FP16-NEXT: fmov d2, x8
+; CHECK-FP16-NEXT: fcvtzu x8, h3
+; CHECK-FP16-NEXT: fmov d3, x9
+; CHECK-FP16-NEXT: fcvtzu x9, h0
+; CHECK-FP16-NEXT: mov v2.d[1], x8
+; CHECK-FP16-NEXT: mov v3.d[1], x9
+; CHECK-FP16-NEXT: cmhi v0.2d, v1.2d, v2.2d
+; CHECK-FP16-NEXT: cmhi v1.2d, v1.2d, v3.2d
+; CHECK-FP16-NEXT: and v2.16b, v2.16b, v0.16b
+; CHECK-FP16-NEXT: and v3.16b, v3.16b, v1.16b
+; CHECK-FP16-NEXT: orn v0.16b, v2.16b, v0.16b
+; CHECK-FP16-NEXT: orn v1.16b, v3.16b, v1.16b
+; CHECK-FP16-NEXT: xtn v0.2s, v0.2d
+; CHECK-FP16-NEXT: xtn2 v0.4s, v1.2d
+; CHECK-FP16-NEXT: ret
+entry:
+ %conv = fptoui <4 x half> %x to <4 x i64>
+ %0 = icmp ult <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %conv6 = trunc <4 x i64> %spec.store.select to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @ustest_f16i32(<4 x half> %x) {
+; CHECK-CVT-LABEL: ustest_f16i32:
+; CHECK-CVT: // %bb.0: // %entry
+; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-CVT-NEXT: mov h2, v0.h[2]
+; CHECK-CVT-NEXT: mov h3, v0.h[1]
+; CHECK-CVT-NEXT: fcvt s4, h0
+; CHECK-CVT-NEXT: mov h0, v0.h[3]
+; CHECK-CVT-NEXT: movi v1.2d, #0x000000ffffffff
+; CHECK-CVT-NEXT: fcvt s2, h2
+; CHECK-CVT-NEXT: fcvt s3, h3
+; CHECK-CVT-NEXT: fcvtzs x8, s4
+; CHECK-CVT-NEXT: fcvt s0, h0
+; CHECK-CVT-NEXT: fcvtzs x9, s2
+; CHECK-CVT-NEXT: fmov d2, x8
+; CHECK-CVT-NEXT: fcvtzs x8, s3
+; CHECK-CVT-NEXT: fmov d3, x9
+; CHECK-CVT-NEXT: fcvtzs x9, s0
+; CHECK-CVT-NEXT: mov v2.d[1], x8
+; CHECK-CVT-NEXT: mov v3.d[1], x9
+; CHECK-CVT-NEXT: cmgt v0.2d, v1.2d, v2.2d
+; CHECK-CVT-NEXT: cmgt v4.2d, v1.2d, v3.2d
+; CHECK-CVT-NEXT: bsl v0.16b, v2.16b, v1.16b
+; CHECK-CVT-NEXT: bit v1.16b, v3.16b, v4.16b
+; CHECK-CVT-NEXT: cmgt v2.2d, v0.2d, #0
+; CHECK-CVT-NEXT: cmgt v3.2d, v1.2d, #0
+; CHECK-CVT-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-CVT-NEXT: and v1.16b, v1.16b, v3.16b
+; CHECK-CVT-NEXT: xtn v0.2s, v0.2d
+; CHECK-CVT-NEXT: xtn2 v0.4s, v1.2d
+; CHECK-CVT-NEXT: ret
+;
+; CHECK-FP16-LABEL: ustest_f16i32:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-FP16-NEXT: mov h3, v0.h[1]
+; CHECK-FP16-NEXT: fcvtzs x8, h0
+; CHECK-FP16-NEXT: mov h0, v0.h[3]
+; CHECK-FP16-NEXT: movi v1.2d, #0x000000ffffffff
+; CHECK-FP16-NEXT: fcvtzs x9, h2
+; CHECK-FP16-NEXT: fmov d2, x8
+; CHECK-FP16-NEXT: fcvtzs x8, h3
+; CHECK-FP16-NEXT: fmov d3, x9
+; CHECK-FP16-NEXT: fcvtzs x9, h0
+; CHECK-FP16-NEXT: mov v2.d[1], x8
+; CHECK-FP16-NEXT: mov v3.d[1], x9
+; CHECK-FP16-NEXT: cmgt v0.2d, v1.2d, v2.2d
+; CHECK-FP16-NEXT: cmgt v4.2d, v1.2d, v3.2d
+; CHECK-FP16-NEXT: bsl v0.16b, v2.16b, v1.16b
+; CHECK-FP16-NEXT: bit v1.16b, v3.16b, v4.16b
+; CHECK-FP16-NEXT: cmgt v2.2d, v0.2d, #0
+; CHECK-FP16-NEXT: cmgt v3.2d, v1.2d, #0
+; CHECK-FP16-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-FP16-NEXT: and v1.16b, v1.16b, v3.16b
+; CHECK-FP16-NEXT: xtn v0.2s, v0.2d
+; CHECK-FP16-NEXT: xtn2 v0.4s, v1.2d
+; CHECK-FP16-NEXT: ret
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i64>
+ %0 = icmp slt <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %1 = icmp sgt <4 x i64> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <4 x i1> %1, <4 x i64> %spec.store.select, <4 x i64> zeroinitializer
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+; i16 saturate
+
+define <2 x i16> @stest_f64i16(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: movi v1.2s, #127, msl #8
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: mvni v1.2s, #127, msl #8
+; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i32>
+ %0 = icmp slt <2 x i32> %conv, <i32 32767, i32 32767>
+ %spec.store.select = select <2 x i1> %0, <2 x i32> %conv, <2 x i32> <i32 32767, i32 32767>
+ %1 = icmp sgt <2 x i32> %spec.store.select, <i32 -32768, i32 -32768>
+ %spec.store.select7 = select <2 x i1> %1, <2 x i32> %spec.store.select, <2 x i32> <i32 -32768, i32 -32768>
+ %conv6 = trunc <2 x i32> %spec.store.select7 to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <2 x i16> @utest_f64i16(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu v0.2d, v0.2d
+; CHECK-NEXT: movi d1, #0x00ffff0000ffff
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i32>
+ %0 = icmp ult <2 x i32> %conv, <i32 65535, i32 65535>
+ %spec.store.select = select <2 x i1> %0, <2 x i32> %conv, <2 x i32> <i32 65535, i32 65535>
+ %conv6 = trunc <2 x i32> %spec.store.select to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <2 x i16> @ustest_f64i16(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: movi d1, #0x00ffff0000ffff
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: smax v0.2s, v0.2s, v2.2s
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i32>
+ %0 = icmp slt <2 x i32> %conv, <i32 65535, i32 65535>
+ %spec.store.select = select <2 x i1> %0, <2 x i32> %conv, <2 x i32> <i32 65535, i32 65535>
+ %1 = icmp sgt <2 x i32> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <2 x i1> %1, <2 x i32> %spec.store.select, <2 x i32> zeroinitializer
+ %conv6 = trunc <2 x i32> %spec.store.select7 to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <4 x i16> @stest_f32i16(<4 x float> %x) {
+; CHECK-LABEL: stest_f32i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: sqxtn v0.4h, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i32>
+ %0 = icmp slt <4 x i32> %conv, <i32 32767, i32 32767, i32 32767, i32 32767>
+ %spec.store.select = select <4 x i1> %0, <4 x i32> %conv, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>
+ %1 = icmp sgt <4 x i32> %spec.store.select, <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %spec.store.select7 = select <4 x i1> %1, <4 x i32> %spec.store.select, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %conv6 = trunc <4 x i32> %spec.store.select7 to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <4 x i16> @utest_f32i16(<4 x float> %x) {
+; CHECK-LABEL: utest_f32i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: uqxtn v0.4h, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui <4 x float> %x to <4 x i32>
+ %0 = icmp ult <4 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535>
+ %spec.store.select = select <4 x i1> %0, <4 x i32> %conv, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
+ %conv6 = trunc <4 x i32> %spec.store.select to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <4 x i16> @ustest_f32i16(<4 x float> %x) {
+; CHECK-LABEL: ustest_f32i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i32>
+ %0 = icmp slt <4 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535>
+ %spec.store.select = select <4 x i1> %0, <4 x i32> %conv, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
+ %1 = icmp sgt <4 x i32> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <4 x i1> %1, <4 x i32> %spec.store.select, <4 x i32> zeroinitializer
+ %conv6 = trunc <4 x i32> %spec.store.select7 to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <8 x i16> @stest_f16i16(<8 x half> %x) {
+; CHECK-LABEL: stest_f16i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtl2 v2.4s, v0.8h
+; CHECK-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-NEXT: movi v1.4s, #127, msl #8
+; CHECK-NEXT: mvni v3.4s, #127, msl #8
+; CHECK-NEXT: fcvtzs v2.4s, v2.4s
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s
+; CHECK-NEXT: sqxtn v0.4h, v0.4s
+; CHECK-NEXT: smax v1.4s, v1.4s, v3.4s
+; CHECK-NEXT: xtn2 v0.8h, v1.4s
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <8 x half> %x to <8 x i32>
+ %0 = icmp slt <8 x i32> %conv, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
+ %spec.store.select = select <8 x i1> %0, <8 x i32> %conv, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
+ %1 = icmp sgt <8 x i32> %spec.store.select, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %spec.store.select7 = select <8 x i1> %1, <8 x i32> %spec.store.select, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %conv6 = trunc <8 x i32> %spec.store.select7 to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+define <8 x i16> @utesth_f16i16(<8 x half> %x) {
+; CHECK-LABEL: utesth_f16i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtl2 v2.4s, v0.8h
+; CHECK-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
+; CHECK-NEXT: fcvtzu v2.4s, v2.4s
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s
+; CHECK-NEXT: uqxtn v0.4h, v0.4s
+; CHECK-NEXT: xtn2 v0.8h, v1.4s
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui <8 x half> %x to <8 x i32>
+ %0 = icmp ult <8 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %spec.store.select = select <8 x i1> %0, <8 x i32> %conv, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %conv6 = trunc <8 x i32> %spec.store.select to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+define <8 x i16> @ustest_f16i16(<8 x half> %x) {
+; CHECK-LABEL: ustest_f16i16:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
+; CHECK-NEXT: movi v3.2d, #0000000000000000
+; CHECK-NEXT: fcvtzs v2.4s, v2.4s
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: smin v2.4s, v2.4s, v1.4s
+; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: smax v1.4s, v2.4s, v3.4s
+; CHECK-NEXT: smax v2.4s, v0.4s, v3.4s
+; CHECK-NEXT: xtn v0.4h, v1.4s
+; CHECK-NEXT: xtn2 v0.8h, v2.4s
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <8 x half> %x to <8 x i32>
+ %0 = icmp slt <8 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %spec.store.select = select <8 x i1> %0, <8 x i32> %conv, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %1 = icmp sgt <8 x i32> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <8 x i1> %1, <8 x i32> %spec.store.select, <8 x i32> zeroinitializer
+ %conv6 = trunc <8 x i32> %spec.store.select7 to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+; i64 saturate
+
+define <2 x i64> @stest_f64i64(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: bl __fixdfti
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mov x20, x1
+; CHECK-NEXT: mov d0, v0.d[1]
+; CHECK-NEXT: bl __fixdfti
+; CHECK-NEXT: mov x8, #9223372036854775807
+; CHECK-NEXT: mov x12, #-9223372036854775808
+; CHECK-NEXT: cmp x0, x8
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: cset w9, lo
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: cset w10, lt
+; CHECK-NEXT: csel w9, w9, w10, eq
+; CHECK-NEXT: cmp x19, x8
+; CHECK-NEXT: cset w10, lo
+; CHECK-NEXT: cmp x20, #0
+; CHECK-NEXT: cset w11, lt
+; CHECK-NEXT: csel w10, w10, w11, eq
+; CHECK-NEXT: cmp w10, #0
+; CHECK-NEXT: csel x10, x19, x8, ne
+; CHECK-NEXT: csel x11, x20, xzr, ne
+; CHECK-NEXT: cmp w9, #0
+; CHECK-NEXT: csel x9, x1, xzr, ne
+; CHECK-NEXT: csel x8, x0, x8, ne
+; CHECK-NEXT: cmp x9, #0
+; CHECK-NEXT: cset w13, ge
+; CHECK-NEXT: cmp x8, x12
+; CHECK-NEXT: cset w14, hi
+; CHECK-NEXT: cmn x9, #1
+; CHECK-NEXT: csel w9, w14, w13, eq
+; CHECK-NEXT: cmp x11, #0
+; CHECK-NEXT: cset w13, ge
+; CHECK-NEXT: cmp x10, x12
+; CHECK-NEXT: cset w14, hi
+; CHECK-NEXT: cmn x11, #1
+; CHECK-NEXT: csel w11, w14, w13, eq
+; CHECK-NEXT: cmp w11, #0
+; CHECK-NEXT: csel x10, x10, x12, ne
+; CHECK-NEXT: cmp w9, #0
+; CHECK-NEXT: csel x8, x8, x12, ne
+; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: fmov d0, x10
+; CHECK-NEXT: fmov d1, x8
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 9223372036854775807, i128 9223372036854775807>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>
+ %1 = icmp sgt <2 x i128> %spec.store.select, <i128 -9223372036854775808, i128 -9223372036854775808>
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utest_f64i64(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: mov d0, v0.d[1]
+; CHECK-NEXT: bl __fixunsdfti
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mov x20, x1
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: bl __fixunsdfti
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: csel x8, x0, xzr, eq
+; CHECK-NEXT: cmp x20, #0
+; CHECK-NEXT: csel x9, x19, xzr, eq
+; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: fmov d1, x9
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i128>
+ %0 = icmp ult <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f64i64(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: mov d0, v0.d[1]
+; CHECK-NEXT: bl __fixdfti
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mov x20, x1
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: bl __fixdfti
+; CHECK-NEXT: cmp x1, #1
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: csel x8, x0, xzr, lt
+; CHECK-NEXT: csinc x9, x1, xzr, lt
+; CHECK-NEXT: cmp x20, #1
+; CHECK-NEXT: csel x10, x19, xzr, lt
+; CHECK-NEXT: csinc x11, x20, xzr, lt
+; CHECK-NEXT: cmp x10, #0
+; CHECK-NEXT: cset w12, ne
+; CHECK-NEXT: cmp x11, #0
+; CHECK-NEXT: cset w11, gt
+; CHECK-NEXT: csel w11, w12, w11, eq
+; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: cset w12, ne
+; CHECK-NEXT: cmp x9, #0
+; CHECK-NEXT: cset w9, gt
+; CHECK-NEXT: csel w9, w12, w9, eq
+; CHECK-NEXT: cmp w9, #0
+; CHECK-NEXT: csel x8, x8, xzr, ne
+; CHECK-NEXT: cmp w11, #0
+; CHECK-NEXT: csel x9, x10, xzr, ne
+; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: fmov d1, x9
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %1 = icmp sgt <2 x i128> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> zeroinitializer
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @stest_f32i64(<2 x float> %x) {
+; CHECK-LABEL: stest_f32i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT: bl __fixsfti
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mov x20, x1
+; CHECK-NEXT: mov s0, v0.s[1]
+; CHECK-NEXT: bl __fixsfti
+; CHECK-NEXT: mov x8, #9223372036854775807
+; CHECK-NEXT: mov x12, #-9223372036854775808
+; CHECK-NEXT: cmp x0, x8
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: cset w9, lo
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: cset w10, lt
+; CHECK-NEXT: csel w9, w9, w10, eq
+; CHECK-NEXT: cmp x19, x8
+; CHECK-NEXT: cset w10, lo
+; CHECK-NEXT: cmp x20, #0
+; CHECK-NEXT: cset w11, lt
+; CHECK-NEXT: csel w10, w10, w11, eq
+; CHECK-NEXT: cmp w10, #0
+; CHECK-NEXT: csel x10, x19, x8, ne
+; CHECK-NEXT: csel x11, x20, xzr, ne
+; CHECK-NEXT: cmp w9, #0
+; CHECK-NEXT: csel x9, x1, xzr, ne
+; CHECK-NEXT: csel x8, x0, x8, ne
+; CHECK-NEXT: cmp x9, #0
+; CHECK-NEXT: cset w13, ge
+; CHECK-NEXT: cmp x8, x12
+; CHECK-NEXT: cset w14, hi
+; CHECK-NEXT: cmn x9, #1
+; CHECK-NEXT: csel w9, w14, w13, eq
+; CHECK-NEXT: cmp x11, #0
+; CHECK-NEXT: cset w13, ge
+; CHECK-NEXT: cmp x10, x12
+; CHECK-NEXT: cset w14, hi
+; CHECK-NEXT: cmn x11, #1
+; CHECK-NEXT: csel w11, w14, w13, eq
+; CHECK-NEXT: cmp w11, #0
+; CHECK-NEXT: csel x10, x10, x12, ne
+; CHECK-NEXT: cmp w9, #0
+; CHECK-NEXT: csel x8, x8, x12, ne
+; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: fmov d0, x10
+; CHECK-NEXT: fmov d1, x8
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x float> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 9223372036854775807, i128 9223372036854775807>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>
+ %1 = icmp sgt <2 x i128> %spec.store.select, <i128 -9223372036854775808, i128 -9223372036854775808>
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utest_f32i64(<2 x float> %x) {
+; CHECK-LABEL: utest_f32i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: mov s0, v0.s[1]
+; CHECK-NEXT: bl __fixunssfti
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mov x20, x1
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT: bl __fixunssfti
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: csel x8, x0, xzr, eq
+; CHECK-NEXT: cmp x20, #0
+; CHECK-NEXT: csel x9, x19, xzr, eq
+; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: fmov d1, x9
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui <2 x float> %x to <2 x i128>
+ %0 = icmp ult <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f32i64(<2 x float> %x) {
+; CHECK-LABEL: ustest_f32i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: mov s0, v0.s[1]
+; CHECK-NEXT: bl __fixsfti
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mov x20, x1
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT: bl __fixsfti
+; CHECK-NEXT: cmp x1, #1
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: csinc x8, x1, xzr, lt
+; CHECK-NEXT: csel x9, x0, xzr, lt
+; CHECK-NEXT: cmp x20, #1
+; CHECK-NEXT: csel x10, x19, xzr, lt
+; CHECK-NEXT: csinc x11, x20, xzr, lt
+; CHECK-NEXT: cmp x10, #0
+; CHECK-NEXT: cset w12, ne
+; CHECK-NEXT: cmp x11, #0
+; CHECK-NEXT: cset w11, gt
+; CHECK-NEXT: csel w11, w12, w11, eq
+; CHECK-NEXT: cmp x9, #0
+; CHECK-NEXT: cset w12, ne
+; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: cset w8, gt
+; CHECK-NEXT: csel w8, w12, w8, eq
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: csel x8, x9, xzr, ne
+; CHECK-NEXT: cmp w11, #0
+; CHECK-NEXT: csel x9, x10, xzr, ne
+; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: fmov d1, x9
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x float> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %1 = icmp sgt <2 x i128> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> zeroinitializer
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @stest_f16i64(<2 x half> %x) {
+; CHECK-LABEL: stest_f16i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0
+; CHECK-NEXT: bl __fixhfti
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mov x20, x1
+; CHECK-NEXT: mov h0, v0.h[1]
+; CHECK-NEXT: bl __fixhfti
+; CHECK-NEXT: mov x8, #9223372036854775807
+; CHECK-NEXT: mov x12, #-9223372036854775808
+; CHECK-NEXT: cmp x0, x8
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: cset w9, lo
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: cset w10, lt
+; CHECK-NEXT: csel w9, w9, w10, eq
+; CHECK-NEXT: cmp x19, x8
+; CHECK-NEXT: cset w10, lo
+; CHECK-NEXT: cmp x20, #0
+; CHECK-NEXT: cset w11, lt
+; CHECK-NEXT: csel w10, w10, w11, eq
+; CHECK-NEXT: cmp w10, #0
+; CHECK-NEXT: csel x10, x19, x8, ne
+; CHECK-NEXT: csel x11, x20, xzr, ne
+; CHECK-NEXT: cmp w9, #0
+; CHECK-NEXT: csel x9, x1, xzr, ne
+; CHECK-NEXT: csel x8, x0, x8, ne
+; CHECK-NEXT: cmp x9, #0
+; CHECK-NEXT: cset w13, ge
+; CHECK-NEXT: cmp x8, x12
+; CHECK-NEXT: cset w14, hi
+; CHECK-NEXT: cmn x9, #1
+; CHECK-NEXT: csel w9, w14, w13, eq
+; CHECK-NEXT: cmp x11, #0
+; CHECK-NEXT: cset w13, ge
+; CHECK-NEXT: cmp x10, x12
+; CHECK-NEXT: cset w14, hi
+; CHECK-NEXT: cmn x11, #1
+; CHECK-NEXT: csel w11, w14, w13, eq
+; CHECK-NEXT: cmp w11, #0
+; CHECK-NEXT: csel x10, x10, x12, ne
+; CHECK-NEXT: cmp w9, #0
+; CHECK-NEXT: csel x8, x8, x12, ne
+; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: fmov d0, x10
+; CHECK-NEXT: fmov d1, x8
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x half> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 9223372036854775807, i128 9223372036854775807>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>
+ %1 = icmp sgt <2 x i128> %spec.store.select, <i128 -9223372036854775808, i128 -9223372036854775808>
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utesth_f16i64(<2 x half> %x) {
+; CHECK-LABEL: utesth_f16i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: mov h0, v0.h[1]
+; CHECK-NEXT: bl __fixunshfti
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mov x20, x1
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0
+; CHECK-NEXT: bl __fixunshfti
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: csel x8, x0, xzr, eq
+; CHECK-NEXT: cmp x20, #0
+; CHECK-NEXT: csel x9, x19, xzr, eq
+; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: fmov d1, x9
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui <2 x half> %x to <2 x i128>
+ %0 = icmp ult <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f16i64(<2 x half> %x) {
+; CHECK-LABEL: ustest_f16i64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: mov h0, v0.h[1]
+; CHECK-NEXT: bl __fixhfti
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mov x20, x1
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0
+; CHECK-NEXT: bl __fixhfti
+; CHECK-NEXT: cmp x1, #1
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: csinc x8, x1, xzr, lt
+; CHECK-NEXT: csel x9, x0, xzr, lt
+; CHECK-NEXT: cmp x20, #1
+; CHECK-NEXT: csel x10, x19, xzr, lt
+; CHECK-NEXT: csinc x11, x20, xzr, lt
+; CHECK-NEXT: cmp x10, #0
+; CHECK-NEXT: cset w12, ne
+; CHECK-NEXT: cmp x11, #0
+; CHECK-NEXT: cset w11, gt
+; CHECK-NEXT: csel w11, w12, w11, eq
+; CHECK-NEXT: cmp x9, #0
+; CHECK-NEXT: cset w12, ne
+; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: cset w8, gt
+; CHECK-NEXT: csel w8, w12, w8, eq
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: csel x8, x9, xzr, ne
+; CHECK-NEXT: cmp w11, #0
+; CHECK-NEXT: csel x9, x10, xzr, ne
+; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: fmov d1, x9
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x half> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %1 = icmp sgt <2 x i128> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> zeroinitializer
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+
+
+; i32 saturate
+
+define <2 x i32> @stest_f64i32_mm(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i32_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #2147483647
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: dup v1.2d, x8
+; CHECK-NEXT: mov x8, #-2147483648
+; CHECK-NEXT: cmgt v2.2d, v1.2d, v0.2d
+; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-NEXT: dup v1.2d, x8
+; CHECK-NEXT: cmgt v2.2d, v0.2d, v1.2d
+; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i64>
+ %spec.store.select = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %conv, <2 x i64> <i64 2147483647, i64 2147483647>)
+ %spec.store.select7 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %spec.store.select, <2 x i64> <i64 -2147483648, i64 -2147483648>)
+ %conv6 = trunc <2 x i64> %spec.store.select7 to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <2 x i32> @utest_f64i32_mm(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i32_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
+; CHECK-NEXT: fcvtzu v0.2d, v0.2d
+; CHECK-NEXT: cmhi v1.2d, v1.2d, v0.2d
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: orn v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i64>
+ %spec.store.select = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %conv, <2 x i64> <i64 4294967295, i64 4294967295>)
+ %conv6 = trunc <2 x i64> %spec.store.select to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i32_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: cmgt v2.2d, v1.2d, v0.2d
+; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-NEXT: cmgt v1.2d, v0.2d, #0
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i64>
+ %spec.store.select = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %conv, <2 x i64> <i64 4294967295, i64 4294967295>)
+ %spec.store.select7 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %spec.store.select, <2 x i64> zeroinitializer)
+ %conv6 = trunc <2 x i64> %spec.store.select7 to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <4 x i32> @stest_f32i32_mm(<4 x float> %x) {
+; CHECK-LABEL: stest_f32i32_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtl v1.2d, v0.2s
+; CHECK-NEXT: fcvtl2 v0.2d, v0.4s
+; CHECK-NEXT: mov w8, #2147483647
+; CHECK-NEXT: fcvtzs v1.2d, v1.2d
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: dup v2.2d, x8
+; CHECK-NEXT: mov x8, #-2147483648
+; CHECK-NEXT: cmgt v3.2d, v2.2d, v1.2d
+; CHECK-NEXT: cmgt v4.2d, v2.2d, v0.2d
+; CHECK-NEXT: bif v1.16b, v2.16b, v3.16b
+; CHECK-NEXT: bif v0.16b, v2.16b, v4.16b
+; CHECK-NEXT: dup v2.2d, x8
+; CHECK-NEXT: cmgt v3.2d, v1.2d, v2.2d
+; CHECK-NEXT: cmgt v4.2d, v0.2d, v2.2d
+; CHECK-NEXT: bif v1.16b, v2.16b, v3.16b
+; CHECK-NEXT: bit v2.16b, v0.16b, v4.16b
+; CHECK-NEXT: xtn v0.2s, v1.2d
+; CHECK-NEXT: xtn2 v0.4s, v2.2d
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
+ %spec.store.select7 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %spec.store.select, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @utest_f32i32_mm(<4 x float> %x) {
+; CHECK-LABEL: utest_f32i32_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtl v2.2d, v0.2s
+; CHECK-NEXT: fcvtl2 v0.2d, v0.4s
+; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
+; CHECK-NEXT: fcvtzu v2.2d, v2.2d
+; CHECK-NEXT: fcvtzu v0.2d, v0.2d
+; CHECK-NEXT: cmhi v3.2d, v1.2d, v2.2d
+; CHECK-NEXT: cmhi v1.2d, v1.2d, v0.2d
+; CHECK-NEXT: and v2.16b, v2.16b, v3.16b
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: orn v2.16b, v2.16b, v3.16b
+; CHECK-NEXT: orn v1.16b, v0.16b, v1.16b
+; CHECK-NEXT: xtn v0.2s, v2.2d
+; CHECK-NEXT: xtn2 v0.4s, v1.2d
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui <4 x float> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
+ %conv6 = trunc <4 x i64> %spec.store.select to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
+; CHECK-LABEL: ustest_f32i32_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtl v2.2d, v0.2s
+; CHECK-NEXT: fcvtl2 v0.2d, v0.4s
+; CHECK-NEXT: movi v1.2d, #0x000000ffffffff
+; CHECK-NEXT: fcvtzs v2.2d, v2.2d
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: cmgt v3.2d, v1.2d, v2.2d
+; CHECK-NEXT: cmgt v4.2d, v1.2d, v0.2d
+; CHECK-NEXT: bif v2.16b, v1.16b, v3.16b
+; CHECK-NEXT: bif v0.16b, v1.16b, v4.16b
+; CHECK-NEXT: cmgt v1.2d, v2.2d, #0
+; CHECK-NEXT: cmgt v3.2d, v0.2d, #0
+; CHECK-NEXT: and v1.16b, v2.16b, v1.16b
+; CHECK-NEXT: and v2.16b, v0.16b, v3.16b
+; CHECK-NEXT: xtn v0.2s, v1.2d
+; CHECK-NEXT: xtn2 v0.4s, v2.2d
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
+ %spec.store.select7 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %spec.store.select, <4 x i64> zeroinitializer)
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
+; CHECK-CVT-LABEL: stest_f16i32_mm:
+; CHECK-CVT: // %bb.0: // %entry
+; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-CVT-NEXT: mov h1, v0.h[2]
+; CHECK-CVT-NEXT: mov h2, v0.h[1]
+; CHECK-CVT-NEXT: fcvt s3, h0
+; CHECK-CVT-NEXT: mov h0, v0.h[3]
+; CHECK-CVT-NEXT: mov w8, #2147483647
+; CHECK-CVT-NEXT: fcvt s1, h1
+; CHECK-CVT-NEXT: fcvt s2, h2
+; CHECK-CVT-NEXT: fcvtzs x9, s3
+; CHECK-CVT-NEXT: fcvt s0, h0
+; CHECK-CVT-NEXT: fcvtzs x10, s1
+; CHECK-CVT-NEXT: fmov d1, x9
+; CHECK-CVT-NEXT: fcvtzs x9, s2
+; CHECK-CVT-NEXT: fmov d2, x10
+; CHECK-CVT-NEXT: fcvtzs x10, s0
+; CHECK-CVT-NEXT: mov v1.d[1], x9
+; CHECK-CVT-NEXT: dup v0.2d, x8
+; CHECK-CVT-NEXT: mov x8, #-2147483648
+; CHECK-CVT-NEXT: mov v2.d[1], x10
+; CHECK-CVT-NEXT: cmgt v3.2d, v0.2d, v1.2d
+; CHECK-CVT-NEXT: cmgt v4.2d, v0.2d, v2.2d
+; CHECK-CVT-NEXT: bif v1.16b, v0.16b, v3.16b
+; CHECK-CVT-NEXT: bit v0.16b, v2.16b, v4.16b
+; CHECK-CVT-NEXT: dup v2.2d, x8
+; CHECK-CVT-NEXT: cmgt v3.2d, v1.2d, v2.2d
+; CHECK-CVT-NEXT: cmgt v4.2d, v0.2d, v2.2d
+; CHECK-CVT-NEXT: bif v1.16b, v2.16b, v3.16b
+; CHECK-CVT-NEXT: bit v2.16b, v0.16b, v4.16b
+; CHECK-CVT-NEXT: xtn v0.2s, v1.2d
+; CHECK-CVT-NEXT: xtn2 v0.4s, v2.2d
+; CHECK-CVT-NEXT: ret
+;
+; CHECK-FP16-LABEL: stest_f16i32_mm:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-FP16-NEXT: mov h1, v0.h[2]
+; CHECK-FP16-NEXT: mov h2, v0.h[1]
+; CHECK-FP16-NEXT: fcvtzs x9, h0
+; CHECK-FP16-NEXT: mov h0, v0.h[3]
+; CHECK-FP16-NEXT: mov w8, #2147483647
+; CHECK-FP16-NEXT: fcvtzs x10, h1
+; CHECK-FP16-NEXT: fmov d1, x9
+; CHECK-FP16-NEXT: fcvtzs x9, h2
+; CHECK-FP16-NEXT: fmov d2, x10
+; CHECK-FP16-NEXT: fcvtzs x10, h0
+; CHECK-FP16-NEXT: mov v1.d[1], x9
+; CHECK-FP16-NEXT: dup v0.2d, x8
+; CHECK-FP16-NEXT: mov x8, #-2147483648
+; CHECK-FP16-NEXT: mov v2.d[1], x10
+; CHECK-FP16-NEXT: cmgt v3.2d, v0.2d, v1.2d
+; CHECK-FP16-NEXT: cmgt v4.2d, v0.2d, v2.2d
+; CHECK-FP16-NEXT: bif v1.16b, v0.16b, v3.16b
+; CHECK-FP16-NEXT: bit v0.16b, v2.16b, v4.16b
+; CHECK-FP16-NEXT: dup v2.2d, x8
+; CHECK-FP16-NEXT: cmgt v3.2d, v1.2d, v2.2d
+; CHECK-FP16-NEXT: cmgt v4.2d, v0.2d, v2.2d
+; CHECK-FP16-NEXT: bif v1.16b, v2.16b, v3.16b
+; CHECK-FP16-NEXT: bit v2.16b, v0.16b, v4.16b
+; CHECK-FP16-NEXT: xtn v0.2s, v1.2d
+; CHECK-FP16-NEXT: xtn2 v0.4s, v2.2d
+; CHECK-FP16-NEXT: ret
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
+ %spec.store.select7 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %spec.store.select, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
+; CHECK-CVT-LABEL: utesth_f16i32_mm:
+; CHECK-CVT: // %bb.0: // %entry
+; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-CVT-NEXT: mov h2, v0.h[2]
+; CHECK-CVT-NEXT: mov h3, v0.h[1]
+; CHECK-CVT-NEXT: fcvt s4, h0
+; CHECK-CVT-NEXT: mov h0, v0.h[3]
+; CHECK-CVT-NEXT: movi v1.2d, #0x000000ffffffff
+; CHECK-CVT-NEXT: fcvt s2, h2
+; CHECK-CVT-NEXT: fcvt s3, h3
+; CHECK-CVT-NEXT: fcvtzu x8, s4
+; CHECK-CVT-NEXT: fcvt s0, h0
+; CHECK-CVT-NEXT: fcvtzu x9, s2
+; CHECK-CVT-NEXT: fmov d2, x8
+; CHECK-CVT-NEXT: fcvtzu x8, s3
+; CHECK-CVT-NEXT: fmov d3, x9
+; CHECK-CVT-NEXT: fcvtzu x9, s0
+; CHECK-CVT-NEXT: mov v2.d[1], x8
+; CHECK-CVT-NEXT: mov v3.d[1], x9
+; CHECK-CVT-NEXT: cmhi v0.2d, v1.2d, v2.2d
+; CHECK-CVT-NEXT: cmhi v1.2d, v1.2d, v3.2d
+; CHECK-CVT-NEXT: and v2.16b, v2.16b, v0.16b
+; CHECK-CVT-NEXT: and v3.16b, v3.16b, v1.16b
+; CHECK-CVT-NEXT: orn v0.16b, v2.16b, v0.16b
+; CHECK-CVT-NEXT: orn v1.16b, v3.16b, v1.16b
+; CHECK-CVT-NEXT: xtn v0.2s, v0.2d
+; CHECK-CVT-NEXT: xtn2 v0.4s, v1.2d
+; CHECK-CVT-NEXT: ret
+;
+; CHECK-FP16-LABEL: utesth_f16i32_mm:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-FP16-NEXT: mov h3, v0.h[1]
+; CHECK-FP16-NEXT: fcvtzu x8, h0
+; CHECK-FP16-NEXT: mov h0, v0.h[3]
+; CHECK-FP16-NEXT: movi v1.2d, #0x000000ffffffff
+; CHECK-FP16-NEXT: fcvtzu x9, h2
+; CHECK-FP16-NEXT: fmov d2, x8
+; CHECK-FP16-NEXT: fcvtzu x8, h3
+; CHECK-FP16-NEXT: fmov d3, x9
+; CHECK-FP16-NEXT: fcvtzu x9, h0
+; CHECK-FP16-NEXT: mov v2.d[1], x8
+; CHECK-FP16-NEXT: mov v3.d[1], x9
+; CHECK-FP16-NEXT: cmhi v0.2d, v1.2d, v2.2d
+; CHECK-FP16-NEXT: cmhi v1.2d, v1.2d, v3.2d
+; CHECK-FP16-NEXT: and v2.16b, v2.16b, v0.16b
+; CHECK-FP16-NEXT: and v3.16b, v3.16b, v1.16b
+; CHECK-FP16-NEXT: orn v0.16b, v2.16b, v0.16b
+; CHECK-FP16-NEXT: orn v1.16b, v3.16b, v1.16b
+; CHECK-FP16-NEXT: xtn v0.2s, v0.2d
+; CHECK-FP16-NEXT: xtn2 v0.4s, v1.2d
+; CHECK-FP16-NEXT: ret
+entry:
+ %conv = fptoui <4 x half> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
+ %conv6 = trunc <4 x i64> %spec.store.select to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
+; CHECK-CVT-LABEL: ustest_f16i32_mm:
+; CHECK-CVT: // %bb.0: // %entry
+; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-CVT-NEXT: mov h2, v0.h[2]
+; CHECK-CVT-NEXT: mov h3, v0.h[1]
+; CHECK-CVT-NEXT: fcvt s4, h0
+; CHECK-CVT-NEXT: mov h0, v0.h[3]
+; CHECK-CVT-NEXT: movi v1.2d, #0x000000ffffffff
+; CHECK-CVT-NEXT: fcvt s2, h2
+; CHECK-CVT-NEXT: fcvt s3, h3
+; CHECK-CVT-NEXT: fcvtzs x8, s4
+; CHECK-CVT-NEXT: fcvt s0, h0
+; CHECK-CVT-NEXT: fcvtzs x9, s2
+; CHECK-CVT-NEXT: fmov d2, x8
+; CHECK-CVT-NEXT: fcvtzs x8, s3
+; CHECK-CVT-NEXT: fmov d3, x9
+; CHECK-CVT-NEXT: fcvtzs x9, s0
+; CHECK-CVT-NEXT: mov v2.d[1], x8
+; CHECK-CVT-NEXT: mov v3.d[1], x9
+; CHECK-CVT-NEXT: cmgt v0.2d, v1.2d, v2.2d
+; CHECK-CVT-NEXT: cmgt v4.2d, v1.2d, v3.2d
+; CHECK-CVT-NEXT: bsl v0.16b, v2.16b, v1.16b
+; CHECK-CVT-NEXT: bit v1.16b, v3.16b, v4.16b
+; CHECK-CVT-NEXT: cmgt v2.2d, v0.2d, #0
+; CHECK-CVT-NEXT: cmgt v3.2d, v1.2d, #0
+; CHECK-CVT-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-CVT-NEXT: and v1.16b, v1.16b, v3.16b
+; CHECK-CVT-NEXT: xtn v0.2s, v0.2d
+; CHECK-CVT-NEXT: xtn2 v0.4s, v1.2d
+; CHECK-CVT-NEXT: ret
+;
+; CHECK-FP16-LABEL: ustest_f16i32_mm:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-FP16-NEXT: mov h3, v0.h[1]
+; CHECK-FP16-NEXT: fcvtzs x8, h0
+; CHECK-FP16-NEXT: mov h0, v0.h[3]
+; CHECK-FP16-NEXT: movi v1.2d, #0x000000ffffffff
+; CHECK-FP16-NEXT: fcvtzs x9, h2
+; CHECK-FP16-NEXT: fmov d2, x8
+; CHECK-FP16-NEXT: fcvtzs x8, h3
+; CHECK-FP16-NEXT: fmov d3, x9
+; CHECK-FP16-NEXT: fcvtzs x9, h0
+; CHECK-FP16-NEXT: mov v2.d[1], x8
+; CHECK-FP16-NEXT: mov v3.d[1], x9
+; CHECK-FP16-NEXT: cmgt v0.2d, v1.2d, v2.2d
+; CHECK-FP16-NEXT: cmgt v4.2d, v1.2d, v3.2d
+; CHECK-FP16-NEXT: bsl v0.16b, v2.16b, v1.16b
+; CHECK-FP16-NEXT: bit v1.16b, v3.16b, v4.16b
+; CHECK-FP16-NEXT: cmgt v2.2d, v0.2d, #0
+; CHECK-FP16-NEXT: cmgt v3.2d, v1.2d, #0
+; CHECK-FP16-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-FP16-NEXT: and v1.16b, v1.16b, v3.16b
+; CHECK-FP16-NEXT: xtn v0.2s, v0.2d
+; CHECK-FP16-NEXT: xtn2 v0.4s, v1.2d
+; CHECK-FP16-NEXT: ret
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
+ %spec.store.select7 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %spec.store.select, <4 x i64> zeroinitializer)
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+; i16 saturate
+
+define <2 x i16> @stest_f64i16_mm(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i16_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: movi v1.2s, #127, msl #8
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: mvni v1.2s, #127, msl #8
+; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i32>
+ %spec.store.select = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %conv, <2 x i32> <i32 32767, i32 32767>)
+ %spec.store.select7 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %spec.store.select, <2 x i32> <i32 -32768, i32 -32768>)
+ %conv6 = trunc <2 x i32> %spec.store.select7 to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <2 x i16> @utest_f64i16_mm(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i16_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu v0.2d, v0.2d
+; CHECK-NEXT: movi d1, #0x00ffff0000ffff
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i32>
+ %spec.store.select = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %conv, <2 x i32> <i32 65535, i32 65535>)
+ %conv6 = trunc <2 x i32> %spec.store.select to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <2 x i16> @ustest_f64i16_mm(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i16_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: movi d1, #0x00ffff0000ffff
+; CHECK-NEXT: movi v2.2d, #0000000000000000
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: smax v0.2s, v0.2s, v2.2s
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i32>
+ %spec.store.select = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %conv, <2 x i32> <i32 65535, i32 65535>)
+ %spec.store.select7 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %spec.store.select, <2 x i32> zeroinitializer)
+ %conv6 = trunc <2 x i32> %spec.store.select7 to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <4 x i16> @stest_f32i16_mm(<4 x float> %x) {
+; CHECK-LABEL: stest_f32i16_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: sqxtn v0.4h, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i32>
+ %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %conv, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
+ %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
+ %conv6 = trunc <4 x i32> %spec.store.select7 to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <4 x i16> @utest_f32i16_mm(<4 x float> %x) {
+; CHECK-LABEL: utest_f32i16_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: uqxtn v0.4h, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui <4 x float> %x to <4 x i32>
+ %spec.store.select = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %conv, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
+ %conv6 = trunc <4 x i32> %spec.store.select to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) {
+; CHECK-LABEL: ustest_f32i16_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i32>
+ %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %conv, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
+ %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer)
+ %conv6 = trunc <4 x i32> %spec.store.select7 to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
+; CHECK-LABEL: stest_f16i16_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtl2 v2.4s, v0.8h
+; CHECK-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-NEXT: movi v1.4s, #127, msl #8
+; CHECK-NEXT: mvni v3.4s, #127, msl #8
+; CHECK-NEXT: fcvtzs v2.4s, v2.4s
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s
+; CHECK-NEXT: sqxtn v0.4h, v0.4s
+; CHECK-NEXT: smax v1.4s, v1.4s, v3.4s
+; CHECK-NEXT: xtn2 v0.8h, v1.4s
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <8 x half> %x to <8 x i32>
+ %spec.store.select = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %conv, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>)
+ %spec.store.select7 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %spec.store.select, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
+ %conv6 = trunc <8 x i32> %spec.store.select7 to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
+; CHECK-LABEL: utesth_f16i16_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtl2 v2.4s, v0.8h
+; CHECK-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
+; CHECK-NEXT: fcvtzu v2.4s, v2.4s
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s
+; CHECK-NEXT: uqxtn v0.4h, v0.4s
+; CHECK-NEXT: xtn2 v0.8h, v1.4s
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui <8 x half> %x to <8 x i32>
+ %spec.store.select = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %conv, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>)
+ %conv6 = trunc <8 x i32> %spec.store.select to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
+; CHECK-LABEL: ustest_f16i16_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
+; CHECK-NEXT: movi v3.2d, #0000000000000000
+; CHECK-NEXT: fcvtzs v2.4s, v2.4s
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: smin v2.4s, v2.4s, v1.4s
+; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: smax v1.4s, v2.4s, v3.4s
+; CHECK-NEXT: smax v2.4s, v0.4s, v3.4s
+; CHECK-NEXT: xtn v0.4h, v1.4s
+; CHECK-NEXT: xtn2 v0.8h, v2.4s
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <8 x half> %x to <8 x i32>
+ %spec.store.select = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %conv, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>)
+ %spec.store.select7 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %spec.store.select, <8 x i32> zeroinitializer)
+ %conv6 = trunc <8 x i32> %spec.store.select7 to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+; i64 saturate
+
+define <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i64_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: bl __fixdfti
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mov x20, x1
+; CHECK-NEXT: mov d0, v0.d[1]
+; CHECK-NEXT: bl __fixdfti
+; CHECK-NEXT: mov x8, #9223372036854775807
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: cmp x0, x8
+; CHECK-NEXT: csel x9, x0, x8, lo
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: csel x10, x0, x8, lt
+; CHECK-NEXT: csel x11, x1, xzr, lt
+; CHECK-NEXT: csel x9, x9, x10, eq
+; CHECK-NEXT: cmp x19, x8
+; CHECK-NEXT: csel x10, x19, x8, lo
+; CHECK-NEXT: cmp x20, #0
+; CHECK-NEXT: csel x8, x19, x8, lt
+; CHECK-NEXT: csel x12, x20, xzr, lt
+; CHECK-NEXT: csel x8, x10, x8, eq
+; CHECK-NEXT: cmp x12, #0
+; CHECK-NEXT: mov x10, #-9223372036854775808
+; CHECK-NEXT: csel x13, x8, x10, ge
+; CHECK-NEXT: cmp x8, x10
+; CHECK-NEXT: csel x8, x8, x10, hi
+; CHECK-NEXT: cmn x12, #1
+; CHECK-NEXT: csel x8, x8, x13, eq
+; CHECK-NEXT: cmp x11, #0
+; CHECK-NEXT: csel x12, x9, x10, ge
+; CHECK-NEXT: cmp x9, x10
+; CHECK-NEXT: csel x9, x9, x10, hi
+; CHECK-NEXT: cmn x11, #1
+; CHECK-NEXT: csel x9, x9, x12, eq
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: fmov d1, x9
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i64_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: mov d0, v0.d[1]
+; CHECK-NEXT: bl __fixunsdfti
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mov x20, x1
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: bl __fixunsdfti
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: csel x8, x0, xzr, eq
+; CHECK-NEXT: cmp x1, #1
+; CHECK-NEXT: csel x8, xzr, x8, eq
+; CHECK-NEXT: cmp x20, #0
+; CHECK-NEXT: csel x9, x19, xzr, eq
+; CHECK-NEXT: cmp x20, #1
+; CHECK-NEXT: csel x9, xzr, x9, eq
+; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: fmov d1, x9
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i64_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: bl __fixdfti
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mov x20, x1
+; CHECK-NEXT: mov d0, v0.d[1]
+; CHECK-NEXT: bl __fixdfti
+; CHECK-NEXT: cmp x1, #1
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: csel x8, x0, xzr, lt
+; CHECK-NEXT: csinc x9, x1, xzr, lt
+; CHECK-NEXT: csel x8, xzr, x8, eq
+; CHECK-NEXT: cmp x20, #1
+; CHECK-NEXT: csel x10, x19, xzr, lt
+; CHECK-NEXT: csinc x11, x20, xzr, lt
+; CHECK-NEXT: csel x10, xzr, x10, eq
+; CHECK-NEXT: cmp x11, #0
+; CHECK-NEXT: csel x11, x10, xzr, gt
+; CHECK-NEXT: csel x10, x10, x11, eq
+; CHECK-NEXT: cmp x9, #0
+; CHECK-NEXT: csel x9, x8, xzr, gt
+; CHECK-NEXT: csel x8, x8, x9, eq
+; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: fmov d0, x10
+; CHECK-NEXT: fmov d1, x8
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> zeroinitializer)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
+; CHECK-LABEL: stest_f32i64_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT: bl __fixsfti
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mov x20, x1
+; CHECK-NEXT: mov s0, v0.s[1]
+; CHECK-NEXT: bl __fixsfti
+; CHECK-NEXT: mov x8, #9223372036854775807
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: cmp x0, x8
+; CHECK-NEXT: csel x9, x0, x8, lo
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: csel x10, x0, x8, lt
+; CHECK-NEXT: csel x9, x9, x10, eq
+; CHECK-NEXT: csel x10, x1, xzr, lt
+; CHECK-NEXT: cmp x19, x8
+; CHECK-NEXT: csel x11, x19, x8, lo
+; CHECK-NEXT: cmp x20, #0
+; CHECK-NEXT: csel x8, x19, x8, lt
+; CHECK-NEXT: csel x12, x20, xzr, lt
+; CHECK-NEXT: csel x8, x11, x8, eq
+; CHECK-NEXT: mov x11, #-9223372036854775808
+; CHECK-NEXT: cmp x8, x11
+; CHECK-NEXT: csel x13, x8, x11, hi
+; CHECK-NEXT: cmp x12, #0
+; CHECK-NEXT: csel x8, x8, x11, ge
+; CHECK-NEXT: cmn x12, #1
+; CHECK-NEXT: csel x8, x13, x8, eq
+; CHECK-NEXT: cmp x9, x11
+; CHECK-NEXT: csel x12, x9, x11, hi
+; CHECK-NEXT: cmp x10, #0
+; CHECK-NEXT: csel x9, x9, x11, ge
+; CHECK-NEXT: cmn x10, #1
+; CHECK-NEXT: csel x9, x12, x9, eq
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: fmov d1, x9
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x float> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
+; CHECK-LABEL: utest_f32i64_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: mov s0, v0.s[1]
+; CHECK-NEXT: bl __fixunssfti
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mov x20, x1
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT: bl __fixunssfti
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: csel x8, x0, xzr, eq
+; CHECK-NEXT: cmp x1, #1
+; CHECK-NEXT: csel x8, xzr, x8, eq
+; CHECK-NEXT: cmp x20, #0
+; CHECK-NEXT: csel x9, x19, xzr, eq
+; CHECK-NEXT: cmp x20, #1
+; CHECK-NEXT: csel x9, xzr, x9, eq
+; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: fmov d1, x9
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui <2 x float> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
+; CHECK-LABEL: ustest_f32i64_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT: bl __fixsfti
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mov x20, x1
+; CHECK-NEXT: mov s0, v0.s[1]
+; CHECK-NEXT: bl __fixsfti
+; CHECK-NEXT: cmp x1, #1
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: csel x8, x0, xzr, lt
+; CHECK-NEXT: csinc x9, x1, xzr, lt
+; CHECK-NEXT: csel x8, xzr, x8, eq
+; CHECK-NEXT: cmp x20, #1
+; CHECK-NEXT: csel x10, x19, xzr, lt
+; CHECK-NEXT: csinc x11, x20, xzr, lt
+; CHECK-NEXT: csel x10, xzr, x10, eq
+; CHECK-NEXT: cmp x11, #0
+; CHECK-NEXT: csel x11, x10, xzr, gt
+; CHECK-NEXT: csel x10, x10, x11, eq
+; CHECK-NEXT: cmp x9, #0
+; CHECK-NEXT: csel x9, x8, xzr, gt
+; CHECK-NEXT: csel x8, x8, x9, eq
+; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: fmov d0, x10
+; CHECK-NEXT: fmov d1, x8
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x float> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> zeroinitializer)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
+; CHECK-LABEL: stest_f16i64_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0
+; CHECK-NEXT: bl __fixhfti
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mov x20, x1
+; CHECK-NEXT: mov h0, v0.h[1]
+; CHECK-NEXT: bl __fixhfti
+; CHECK-NEXT: mov x8, #9223372036854775807
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: cmp x0, x8
+; CHECK-NEXT: csel x9, x0, x8, lo
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: csel x10, x0, x8, lt
+; CHECK-NEXT: csel x9, x9, x10, eq
+; CHECK-NEXT: csel x10, x1, xzr, lt
+; CHECK-NEXT: cmp x19, x8
+; CHECK-NEXT: csel x11, x19, x8, lo
+; CHECK-NEXT: cmp x20, #0
+; CHECK-NEXT: csel x8, x19, x8, lt
+; CHECK-NEXT: csel x12, x20, xzr, lt
+; CHECK-NEXT: csel x8, x11, x8, eq
+; CHECK-NEXT: mov x11, #-9223372036854775808
+; CHECK-NEXT: cmp x8, x11
+; CHECK-NEXT: csel x13, x8, x11, hi
+; CHECK-NEXT: cmp x12, #0
+; CHECK-NEXT: csel x8, x8, x11, ge
+; CHECK-NEXT: cmn x12, #1
+; CHECK-NEXT: csel x8, x13, x8, eq
+; CHECK-NEXT: cmp x9, x11
+; CHECK-NEXT: csel x12, x9, x11, hi
+; CHECK-NEXT: cmp x10, #0
+; CHECK-NEXT: csel x9, x9, x11, ge
+; CHECK-NEXT: cmn x10, #1
+; CHECK-NEXT: csel x9, x12, x9, eq
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: fmov d1, x9
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x half> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
+; CHECK-LABEL: utesth_f16i64_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: mov h0, v0.h[1]
+; CHECK-NEXT: bl __fixunshfti
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mov x20, x1
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0
+; CHECK-NEXT: bl __fixunshfti
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: csel x8, x0, xzr, eq
+; CHECK-NEXT: cmp x1, #1
+; CHECK-NEXT: csel x8, xzr, x8, eq
+; CHECK-NEXT: cmp x20, #0
+; CHECK-NEXT: csel x9, x19, xzr, eq
+; CHECK-NEXT: cmp x20, #1
+; CHECK-NEXT: csel x9, xzr, x9, eq
+; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: fmov d1, x9
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui <2 x half> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
+; CHECK-LABEL: ustest_f16i64_mm:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0
+; CHECK-NEXT: bl __fixhfti
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mov x20, x1
+; CHECK-NEXT: mov h0, v0.h[1]
+; CHECK-NEXT: bl __fixhfti
+; CHECK-NEXT: cmp x1, #1
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: csel x8, x0, xzr, lt
+; CHECK-NEXT: csinc x9, x1, xzr, lt
+; CHECK-NEXT: csel x8, xzr, x8, eq
+; CHECK-NEXT: cmp x20, #1
+; CHECK-NEXT: csel x10, x19, xzr, lt
+; CHECK-NEXT: csinc x11, x20, xzr, lt
+; CHECK-NEXT: csel x10, xzr, x10, eq
+; CHECK-NEXT: cmp x11, #0
+; CHECK-NEXT: csel x11, x10, xzr, gt
+; CHECK-NEXT: csel x10, x10, x11, eq
+; CHECK-NEXT: cmp x9, #0
+; CHECK-NEXT: csel x9, x8, xzr, gt
+; CHECK-NEXT: csel x8, x8, x9, eq
+; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: fmov d0, x10
+; CHECK-NEXT: fmov d1, x8
+; CHECK-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x half> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> zeroinitializer)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)
+declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
+declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>)
+declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>)
+declare <8 x i32> @llvm.umin.v8i32(<8 x i32>, <8 x i32>)
+declare <2 x i64> @llvm.smin.v2i64(<2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.smax.v2i64(<2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.umin.v2i64(<2 x i64>, <2 x i64>)
+declare <4 x i64> @llvm.smin.v4i64(<4 x i64>, <4 x i64>)
+declare <4 x i64> @llvm.smax.v4i64(<4 x i64>, <4 x i64>)
+declare <4 x i64> @llvm.umin.v4i64(<4 x i64>, <4 x i64>)
+declare <2 x i128> @llvm.smin.v2i128(<2 x i128>, <2 x i128>)
+declare <2 x i128> @llvm.smax.v2i128(<2 x i128>, <2 x i128>)
+declare <2 x i128> @llvm.umin.v2i128(<2 x i128>, <2 x i128>)
diff --git a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
new file mode 100644
index 0000000000000..74dd9fe154aa4
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll
@@ -0,0 +1,5088 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=armv7a-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-NEON
+; RUN: llc < %s -mtriple=armv8a-none-eabihf -mattr=+neon,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+
+; i32 saturate
+
+define <2 x i32> @stest_f64i32(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r11, lr}
+; CHECK-NEXT: push {r4, r5, r11, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vmov r0, r1, d8
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: adr r2, .LCPI0_0
+; CHECK-NEXT: vld1.64 {d8, d9}, [r2:128]
+; CHECK-NEXT: vmov.32 d10[0], r4
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: mvn r3, #-2147483648
+; CHECK-NEXT: subs r4, r4, r3
+; CHECK-NEXT: sbcs r4, r5, #0
+; CHECK-NEXT: vmov.32 d11[0], r0
+; CHECK-NEXT: mov r4, #0
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movwlt r4, #1
+; CHECK-NEXT: subs r0, r0, r3
+; CHECK-NEXT: sbcs r0, r1, #0
+; CHECK-NEXT: vmov.32 d11[1], r1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmov.i32 q10, #0x80000000
+; CHECK-NEXT: movwlt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: vmov.32 d10[1], r5
+; CHECK-NEXT: mvnne r4, #0
+; CHECK-NEXT: vdup.32 d17, r0
+; CHECK-NEXT: vdup.32 d16, r4
+; CHECK-NEXT: mvn r4, #0
+; CHECK-NEXT: vbsl q8, q5, q4
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r3, r5, d17
+; CHECK-NEXT: rsbs r0, r0, #-2147483648
+; CHECK-NEXT: sbcs r0, r4, r1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: movwlt r0, #1
+; CHECK-NEXT: rsbs r1, r3, #-2147483648
+; CHECK-NEXT: sbcs r1, r4, r5
+; CHECK-NEXT: movwlt r2, #1
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: mvnne r2, #0
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: vdup.32 d19, r2
+; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: vdup.32 d18, r0
+; CHECK-NEXT: vbif q8, q10, q9
+; CHECK-NEXT: vmovn.i64 d0, q8
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r4, r5, r11, pc}
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI0_0:
+; CHECK-NEXT: .long 2147483647 @ 0x7fffffff
+; CHECK-NEXT: .long 0 @ 0x0
+; CHECK-NEXT: .long 2147483647 @ 0x7fffffff
+; CHECK-NEXT: .long 0 @ 0x0
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i64>
+ %0 = icmp slt <2 x i64> %conv, <i64 2147483647, i64 2147483647>
+ %spec.store.select = select <2 x i1> %0, <2 x i64> %conv, <2 x i64> <i64 2147483647, i64 2147483647>
+ %1 = icmp sgt <2 x i64> %spec.store.select, <i64 -2147483648, i64 -2147483648>
+ %spec.store.select7 = select <2 x i1> %1, <2 x i64> %spec.store.select, <2 x i64> <i64 -2147483648, i64 -2147483648>
+ %conv6 = trunc <2 x i64> %spec.store.select7 to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <2 x i32> @utest_f64i32(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r11, lr}
+; CHECK-NEXT: push {r4, r5, r11, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: vmov r0, r1, d8
+; CHECK-NEXT: vmov.32 d9[0], r4
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: mvn r3, #0
+; CHECK-NEXT: vmov.32 d8[0], r0
+; CHECK-NEXT: subs r0, r0, r3
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: sbcs r0, r1, #0
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: movwlo r0, #1
+; CHECK-NEXT: subs r1, r4, r3
+; CHECK-NEXT: sbcs r1, r5, #0
+; CHECK-NEXT: movwlo r2, #1
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: mvnne r2, #0
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: vdup.32 d17, r2
+; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: vdup.32 d16, r0
+; CHECK-NEXT: vand q9, q4, q8
+; CHECK-NEXT: vorn q8, q9, q8
+; CHECK-NEXT: vmovn.i64 d0, q8
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r4, r5, r11, pc}
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i64>
+ %0 = icmp ult <2 x i64> %conv, <i64 4294967295, i64 4294967295>
+ %spec.store.select = select <2 x i1> %0, <2 x i64> %conv, <2 x i64> <i64 4294967295, i64 4294967295>
+ %conv6 = trunc <2 x i64> %spec.store.select to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <2 x i32> @ustest_f64i32(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r11, lr}
+; CHECK-NEXT: push {r4, r5, r11, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vmov r0, r1, d8
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: vmov.32 d8[0], r4
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: mvn r3, #0
+; CHECK-NEXT: subs r4, r4, r3
+; CHECK-NEXT: sbcs r4, r5, #0
+; CHECK-NEXT: vmov.32 d9[0], r0
+; CHECK-NEXT: mov r4, #0
+; CHECK-NEXT: vmov.i64 q9, #0xffffffff
+; CHECK-NEXT: movwlt r4, #1
+; CHECK-NEXT: subs r0, r0, r3
+; CHECK-NEXT: sbcs r0, r1, #0
+; CHECK-NEXT: vmov.32 d9[1], r1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movwlt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: vmov.32 d8[1], r5
+; CHECK-NEXT: mvnne r4, #0
+; CHECK-NEXT: vdup.32 d17, r0
+; CHECK-NEXT: vdup.32 d16, r4
+; CHECK-NEXT: vbsl q8, q4, q9
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r3, r5, d17
+; CHECK-NEXT: rsbs r0, r0, #0
+; CHECK-NEXT: rscs r0, r1, #0
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: movwlt r0, #1
+; CHECK-NEXT: rsbs r1, r3, #0
+; CHECK-NEXT: rscs r1, r5, #0
+; CHECK-NEXT: movwlt r2, #1
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: mvnne r2, #0
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: vmov.32 d19[0], r2
+; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: vmov.32 d18[0], r0
+; CHECK-NEXT: vand q8, q8, q9
+; CHECK-NEXT: vmovn.i64 d0, q8
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r4, r5, r11, pc}
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i64>
+ %0 = icmp slt <2 x i64> %conv, <i64 4294967295, i64 4294967295>
+ %spec.store.select = select <2 x i1> %0, <2 x i64> %conv, <2 x i64> <i64 4294967295, i64 4294967295>
+ %1 = icmp sgt <2 x i64> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <2 x i1> %1, <2 x i64> %spec.store.select, <2 x i64> zeroinitializer
+ %conv6 = trunc <2 x i64> %spec.store.select7 to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <4 x i32> @stest_f32i32(<4 x float> %x) {
+; CHECK-LABEL: stest_f32i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, sp, #4
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vmov r0, s16
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: mov r8, r1
+; CHECK-NEXT: vmov r6, s17
+; CHECK-NEXT: vmov r10, s19
+; CHECK-NEXT: vmov.32 d8[0], r7
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: vmov.32 d10[0], r0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: vmov.32 d9[0], r0
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r11, r1
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: mvn r6, #-2147483648
+; CHECK-NEXT: subs r3, r7, r6
+; CHECK-NEXT: sbcs r3, r8, #0
+; CHECK-NEXT: vmov.32 d11[0], r0
+; CHECK-NEXT: mov r3, #0
+; CHECK-NEXT: adr r2, .LCPI3_0
+; CHECK-NEXT: movwlt r3, #1
+; CHECK-NEXT: subs r7, r5, r6
+; CHECK-NEXT: sbcs r7, r4, #0
+; CHECK-NEXT: vmov.32 d11[1], r1
+; CHECK-NEXT: mov r7, #0
+; CHECK-NEXT: movwlt r7, #1
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: mvnne r7, #0
+; CHECK-NEXT: subs r0, r0, r6
+; CHECK-NEXT: sbcs r0, r1, #0
+; CHECK-NEXT: vld1.64 {d18, d19}, [r2:128]
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movwlt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: vmov.32 d10[1], r4
+; CHECK-NEXT: vdup.32 d17, r0
+; CHECK-NEXT: subs r0, r9, r6
+; CHECK-NEXT: sbcs r0, r11, #0
+; CHECK-NEXT: vdup.32 d16, r7
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vbsl q8, q5, q9
+; CHECK-NEXT: movwlt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: vmov.32 d9[1], r11
+; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: mvn r6, #0
+; CHECK-NEXT: vdup.32 d21, r0
+; CHECK-NEXT: mvnne r3, #0
+; CHECK-NEXT: vmov.32 d8[1], r8
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vdup.32 d20, r3
+; CHECK-NEXT: vbit q9, q4, q10
+; CHECK-NEXT: adr r5, .LCPI3_1
+; CHECK-NEXT: vld1.64 {d20, d21}, [r5:128]
+; CHECK-NEXT: vmov r5, r4, d17
+; CHECK-NEXT: vmov r3, r7, d18
+; CHECK-NEXT: rsbs r0, r0, #-2147483648
+; CHECK-NEXT: sbcs r0, r6, r1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: movwlt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: rsbs r1, r3, #-2147483648
+; CHECK-NEXT: vmov r1, r3, d19
+; CHECK-NEXT: sbcs r7, r6, r7
+; CHECK-NEXT: mov r7, #0
+; CHECK-NEXT: movwlt r7, #1
+; CHECK-NEXT: rsbs r5, r5, #-2147483648
+; CHECK-NEXT: sbcs r5, r6, r4
+; CHECK-NEXT: mov r5, #0
+; CHECK-NEXT: movwlt r5, #1
+; CHECK-NEXT: rsbs r1, r1, #-2147483648
+; CHECK-NEXT: sbcs r1, r6, r3
+; CHECK-NEXT: movwlt r2, #1
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: mvnne r2, #0
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: mvnne r5, #0
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: vdup.32 d25, r5
+; CHECK-NEXT: mvnne r7, #0
+; CHECK-NEXT: vdup.32 d23, r2
+; CHECK-NEXT: vdup.32 d24, r0
+; CHECK-NEXT: vbif q8, q10, q12
+; CHECK-NEXT: vdup.32 d22, r7
+; CHECK-NEXT: vbif q9, q10, q11
+; CHECK-NEXT: vmovn.i64 d1, q8
+; CHECK-NEXT: vmovn.i64 d0, q9
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: add sp, sp, #4
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI3_0:
+; CHECK-NEXT: .long 2147483647 @ 0x7fffffff
+; CHECK-NEXT: .long 0 @ 0x0
+; CHECK-NEXT: .long 2147483647 @ 0x7fffffff
+; CHECK-NEXT: .long 0 @ 0x0
+; CHECK-NEXT: .LCPI3_1:
+; CHECK-NEXT: .long 2147483648 @ 0x80000000
+; CHECK-NEXT: .long 4294967295 @ 0xffffffff
+; CHECK-NEXT: .long 2147483648 @ 0x80000000
+; CHECK-NEXT: .long 4294967295 @ 0xffffffff
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i64>
+ %0 = icmp slt <4 x i64> %conv, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
+ %1 = icmp sgt <4 x i64> %spec.store.select, <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
+ %spec.store.select7 = select <4 x i1> %1, <4 x i64> %spec.store.select, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @utest_f32i32(<4 x float> %x) {
+; CHECK-LABEL: utest_f32i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vmov r0, s17
+; CHECK-NEXT: bl __aeabi_f2ulz
+; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: vmov r0, s16
+; CHECK-NEXT: mov r8, r1
+; CHECK-NEXT: vmov r6, s19
+; CHECK-NEXT: vmov r7, s18
+; CHECK-NEXT: vmov.32 d9[0], r9
+; CHECK-NEXT: bl __aeabi_f2ulz
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: vmov.32 d8[0], r0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: bl __aeabi_f2ulz
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: vmov.32 d11[0], r0
+; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: mov r10, r1
+; CHECK-NEXT: bl __aeabi_f2ulz
+; CHECK-NEXT: mvn r7, #0
+; CHECK-NEXT: subs r2, r5, r7
+; CHECK-NEXT: sbcs r2, r4, #0
+; CHECK-NEXT: vmov.32 d10[0], r0
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: mov r3, #0
+; CHECK-NEXT: movwlo r2, #1
+; CHECK-NEXT: subs r0, r0, r7
+; CHECK-NEXT: sbcs r0, r1, #0
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: movwlo r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: subs r1, r6, r7
+; CHECK-NEXT: sbcs r1, r10, #0
+; CHECK-NEXT: mov r1, #0
+; CHECK-NEXT: movwlo r1, #1
+; CHECK-NEXT: subs r7, r9, r7
+; CHECK-NEXT: sbcs r7, r8, #0
+; CHECK-NEXT: movwlo r3, #1
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: mvnne r3, #0
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: mvnne r1, #0
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: vdup.32 d19, r1
+; CHECK-NEXT: mvnne r2, #0
+; CHECK-NEXT: vdup.32 d17, r3
+; CHECK-NEXT: vdup.32 d18, r0
+; CHECK-NEXT: vand q10, q5, q9
+; CHECK-NEXT: vdup.32 d16, r2
+; CHECK-NEXT: vand q11, q4, q8
+; CHECK-NEXT: vorn q9, q10, q9
+; CHECK-NEXT: vorn q8, q11, q8
+; CHECK-NEXT: vmovn.i64 d1, q9
+; CHECK-NEXT: vmovn.i64 d0, q8
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+entry:
+ %conv = fptoui <4 x float> %x to <4 x i64>
+ %0 = icmp ult <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %conv6 = trunc <4 x i64> %spec.store.select to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @ustest_f32i32(<4 x float> %x) {
+; CHECK-LABEL: ustest_f32i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vmov r0, s17
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: vmov r0, s16
+; CHECK-NEXT: mov r6, r1
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: mov r2, r0
+; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: vmov.32 d16[0], r2
+; CHECK-NEXT: mvn r4, #0
+; CHECK-NEXT: subs r2, r2, r4
+; CHECK-NEXT: vmov r8, s19
+; CHECK-NEXT: sbcs r2, r1, #0
+; CHECK-NEXT: vmov.32 d17[0], r5
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: vmov.i64 q5, #0xffffffff
+; CHECK-NEXT: movwlt r2, #1
+; CHECK-NEXT: subs r3, r5, r4
+; CHECK-NEXT: sbcs r3, r6, #0
+; CHECK-NEXT: vmov.32 d17[1], r6
+; CHECK-NEXT: mov r3, #0
+; CHECK-NEXT: mov r7, #0
+; CHECK-NEXT: movwlt r3, #1
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: mvnne r3, #0
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: vdup.32 d19, r3
+; CHECK-NEXT: mvnne r2, #0
+; CHECK-NEXT: vdup.32 d18, r2
+; CHECK-NEXT: vmov.32 d16[1], r1
+; CHECK-NEXT: vorr q4, q9, q9
+; CHECK-NEXT: vbsl q4, q8, q5
+; CHECK-NEXT: vmov r10, r9, d8
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: vmov.32 d12[0], r0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r6, r1
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: subs r2, r5, r4
+; CHECK-NEXT: vmov.32 d13[0], r0
+; CHECK-NEXT: sbcs r2, r6, #0
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movwlt r2, #1
+; CHECK-NEXT: subs r0, r0, r4
+; CHECK-NEXT: sbcs r0, r1, #0
+; CHECK-NEXT: vmov.32 d13[1], r1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: vmov r5, r4, d9
+; CHECK-NEXT: movwlt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: vmov.32 d12[1], r6
+; CHECK-NEXT: mvnne r2, #0
+; CHECK-NEXT: vdup.32 d17, r0
+; CHECK-NEXT: rsbs r0, r10, #0
+; CHECK-NEXT: vdup.32 d16, r2
+; CHECK-NEXT: rscs r0, r9, #0
+; CHECK-NEXT: vbsl q8, q6, q5
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: movwlt r0, #1
+; CHECK-NEXT: vmov r1, r2, d16
+; CHECK-NEXT: vmov r3, r6, d17
+; CHECK-NEXT: rsbs r1, r1, #0
+; CHECK-NEXT: rscs r1, r2, #0
+; CHECK-NEXT: mov r1, #0
+; CHECK-NEXT: movwlt r1, #1
+; CHECK-NEXT: rsbs r2, r3, #0
+; CHECK-NEXT: rscs r2, r6, #0
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movwlt r2, #1
+; CHECK-NEXT: rsbs r3, r5, #0
+; CHECK-NEXT: rscs r3, r4, #0
+; CHECK-NEXT: movwlt r7, #1
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: mvnne r7, #0
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: mvnne r2, #0
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: mvnne r1, #0
+; CHECK-NEXT: vmov.32 d21[0], r2
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: vmov.32 d20[0], r1
+; CHECK-NEXT: mvnne r0, #0
+; CHECK-NEXT: vmov.32 d19[0], r7
+; CHECK-NEXT: vand q8, q8, q10
+; CHECK-NEXT: vmov.32 d18[0], r0
+; CHECK-NEXT: vmovn.i64 d1, q8
+; CHECK-NEXT: vand q9, q4, q9
+; CHECK-NEXT: vmovn.i64 d0, q9
+; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i64>
+ %0 = icmp slt <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %1 = icmp sgt <4 x i64> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <4 x i1> %1, <4 x i64> %spec.store.select, <4 x i64> zeroinitializer
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @stest_f16i32(<4 x half> %x) {
+; CHECK-NEON-LABEL: stest_f16i32:
+; CHECK-NEON: @ %bb.0: @ %entry
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEON-NEXT: .pad #4
+; CHECK-NEON-NEXT: sub sp, sp, #4
+; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.f32 s16, s3
+; CHECK-NEON-NEXT: vmov.f32 s18, s2
+; CHECK-NEON-NEXT: vmov.f32 s20, s1
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: bl __aeabi_f2lz
+; CHECK-NEON-NEXT: mov r9, r0
+; CHECK-NEON-NEXT: vmov r0, s18
+; CHECK-NEON-NEXT: vmov r10, s16
+; CHECK-NEON-NEXT: mov r8, r1
+; CHECK-NEON-NEXT: vmov r6, s20
+; CHECK-NEON-NEXT: vmov.32 d8[0], r9
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: bl __aeabi_f2lz
+; CHECK-NEON-NEXT: mov r5, r0
+; CHECK-NEON-NEXT: vmov.32 d10[0], r0
+; CHECK-NEON-NEXT: mov r0, r6
+; CHECK-NEON-NEXT: mov r4, r1
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: bl __aeabi_f2lz
+; CHECK-NEON-NEXT: mov r11, r0
+; CHECK-NEON-NEXT: vmov.32 d9[0], r0
+; CHECK-NEON-NEXT: mov r0, r10
+; CHECK-NEON-NEXT: mov r7, r1
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: bl __aeabi_f2lz
+; CHECK-NEON-NEXT: mvn r6, #-2147483648
+; CHECK-NEON-NEXT: subs r3, r9, r6
+; CHECK-NEON-NEXT: sbcs r3, r8, #0
+; CHECK-NEON-NEXT: vmov.32 d11[0], r0
+; CHECK-NEON-NEXT: mov r3, #0
+; CHECK-NEON-NEXT: adr r2, .LCPI6_0
+; CHECK-NEON-NEXT: movwlt r3, #1
+; CHECK-NEON-NEXT: subs r5, r5, r6
+; CHECK-NEON-NEXT: sbcs r5, r4, #0
+; CHECK-NEON-NEXT: vmov.32 d11[1], r1
+; CHECK-NEON-NEXT: mov r5, #0
+; CHECK-NEON-NEXT: movwlt r5, #1
+; CHECK-NEON-NEXT: cmp r5, #0
+; CHECK-NEON-NEXT: mvnne r5, #0
+; CHECK-NEON-NEXT: subs r0, r0, r6
+; CHECK-NEON-NEXT: sbcs r0, r1, #0
+; CHECK-NEON-NEXT: vld1.64 {d18, d19}, [r2:128]
+; CHECK-NEON-NEXT: mov r0, #0
+; CHECK-NEON-NEXT: mov r2, #0
+; CHECK-NEON-NEXT: movwlt r0, #1
+; CHECK-NEON-NEXT: cmp r0, #0
+; CHECK-NEON-NEXT: mvnne r0, #0
+; CHECK-NEON-NEXT: vmov.32 d10[1], r4
+; CHECK-NEON-NEXT: vdup.32 d17, r0
+; CHECK-NEON-NEXT: subs r0, r11, r6
+; CHECK-NEON-NEXT: sbcs r0, r7, #0
+; CHECK-NEON-NEXT: vdup.32 d16, r5
+; CHECK-NEON-NEXT: mov r0, #0
+; CHECK-NEON-NEXT: vbsl q8, q5, q9
+; CHECK-NEON-NEXT: movwlt r0, #1
+; CHECK-NEON-NEXT: cmp r0, #0
+; CHECK-NEON-NEXT: vmov.32 d9[1], r7
+; CHECK-NEON-NEXT: mvnne r0, #0
+; CHECK-NEON-NEXT: cmp r3, #0
+; CHECK-NEON-NEXT: mvn r6, #0
+; CHECK-NEON-NEXT: vdup.32 d21, r0
+; CHECK-NEON-NEXT: mvnne r3, #0
+; CHECK-NEON-NEXT: vmov.32 d8[1], r8
+; CHECK-NEON-NEXT: vmov r0, r1, d16
+; CHECK-NEON-NEXT: vdup.32 d20, r3
+; CHECK-NEON-NEXT: vbit q9, q4, q10
+; CHECK-NEON-NEXT: adr r5, .LCPI6_1
+; CHECK-NEON-NEXT: vld1.64 {d20, d21}, [r5:128]
+; CHECK-NEON-NEXT: vmov r5, r4, d17
+; CHECK-NEON-NEXT: vmov r3, r7, d18
+; CHECK-NEON-NEXT: rsbs r0, r0, #-2147483648
+; CHECK-NEON-NEXT: sbcs r0, r6, r1
+; CHECK-NEON-NEXT: mov r0, #0
+; CHECK-NEON-NEXT: movwlt r0, #1
+; CHECK-NEON-NEXT: cmp r0, #0
+; CHECK-NEON-NEXT: mvnne r0, #0
+; CHECK-NEON-NEXT: rsbs r1, r3, #-2147483648
+; CHECK-NEON-NEXT: vmov r1, r3, d19
+; CHECK-NEON-NEXT: sbcs r7, r6, r7
+; CHECK-NEON-NEXT: mov r7, #0
+; CHECK-NEON-NEXT: movwlt r7, #1
+; CHECK-NEON-NEXT: rsbs r5, r5, #-2147483648
+; CHECK-NEON-NEXT: sbcs r5, r6, r4
+; CHECK-NEON-NEXT: mov r5, #0
+; CHECK-NEON-NEXT: movwlt r5, #1
+; CHECK-NEON-NEXT: rsbs r1, r1, #-2147483648
+; CHECK-NEON-NEXT: sbcs r1, r6, r3
+; CHECK-NEON-NEXT: movwlt r2, #1
+; CHECK-NEON-NEXT: cmp r2, #0
+; CHECK-NEON-NEXT: mvnne r2, #0
+; CHECK-NEON-NEXT: cmp r5, #0
+; CHECK-NEON-NEXT: mvnne r5, #0
+; CHECK-NEON-NEXT: cmp r7, #0
+; CHECK-NEON-NEXT: vdup.32 d25, r5
+; CHECK-NEON-NEXT: mvnne r7, #0
+; CHECK-NEON-NEXT: vdup.32 d23, r2
+; CHECK-NEON-NEXT: vdup.32 d24, r0
+; CHECK-NEON-NEXT: vbif q8, q10, q12
+; CHECK-NEON-NEXT: vdup.32 d22, r7
+; CHECK-NEON-NEXT: vbif q9, q10, q11
+; CHECK-NEON-NEXT: vmovn.i64 d1, q8
+; CHECK-NEON-NEXT: vmovn.i64 d0, q9
+; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEON-NEXT: add sp, sp, #4
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+; CHECK-NEON-NEXT: .p2align 4
+; CHECK-NEON-NEXT: @ %bb.1:
+; CHECK-NEON-NEXT: .LCPI6_0:
+; CHECK-NEON-NEXT: .long 2147483647 @ 0x7fffffff
+; CHECK-NEON-NEXT: .long 0 @ 0x0
+; CHECK-NEON-NEXT: .long 2147483647 @ 0x7fffffff
+; CHECK-NEON-NEXT: .long 0 @ 0x0
+; CHECK-NEON-NEXT: .LCPI6_1:
+; CHECK-NEON-NEXT: .long 2147483648 @ 0x80000000
+; CHECK-NEON-NEXT: .long 4294967295 @ 0xffffffff
+; CHECK-NEON-NEXT: .long 2147483648 @ 0x80000000
+; CHECK-NEON-NEXT: .long 4294967295 @ 0xffffffff
+;
+; CHECK-FP16-LABEL: stest_f16i32:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-FP16-NEXT: .vsave {d10, d11, d12, d13}
+; CHECK-FP16-NEXT: vpush {d10, d11, d12, d13}
+; CHECK-FP16-NEXT: .vsave {d8}
+; CHECK-FP16-NEXT: vpush {d8}
+; CHECK-FP16-NEXT: vmov.u16 r0, d0[0]
+; CHECK-FP16-NEXT: vorr d8, d0, d0
+; CHECK-FP16-NEXT: vmov.u16 r6, d0[1]
+; CHECK-FP16-NEXT: vmov s0, r0
+; CHECK-FP16-NEXT: bl __fixhfdi
+; CHECK-FP16-NEXT: mov r4, r0
+; CHECK-FP16-NEXT: vmov.u16 r0, d8[2]
+; CHECK-FP16-NEXT: mov r8, r1
+; CHECK-FP16-NEXT: vmov.32 d10[0], r4
+; CHECK-FP16-NEXT: vmov s0, r0
+; CHECK-FP16-NEXT: bl __fixhfdi
+; CHECK-FP16-NEXT: vmov s0, r6
+; CHECK-FP16-NEXT: mov r5, r0
+; CHECK-FP16-NEXT: mov r7, r1
+; CHECK-FP16-NEXT: vmov.32 d12[0], r0
+; CHECK-FP16-NEXT: bl __fixhfdi
+; CHECK-FP16-NEXT: mov r9, r0
+; CHECK-FP16-NEXT: vmov.u16 r0, d8[3]
+; CHECK-FP16-NEXT: mov r10, r1
+; CHECK-FP16-NEXT: vmov.32 d11[0], r9
+; CHECK-FP16-NEXT: vmov s0, r0
+; CHECK-FP16-NEXT: bl __fixhfdi
+; CHECK-FP16-NEXT: mvn r6, #-2147483648
+; CHECK-FP16-NEXT: subs r3, r4, r6
+; CHECK-FP16-NEXT: sbcs r3, r8, #0
+; CHECK-FP16-NEXT: vmov.32 d13[0], r0
+; CHECK-FP16-NEXT: mov r3, #0
+; CHECK-FP16-NEXT: adr r2, .LCPI6_0
+; CHECK-FP16-NEXT: movwlt r3, #1
+; CHECK-FP16-NEXT: subs r5, r5, r6
+; CHECK-FP16-NEXT: sbcs r5, r7, #0
+; CHECK-FP16-NEXT: vmov.32 d13[1], r1
+; CHECK-FP16-NEXT: mov r5, #0
+; CHECK-FP16-NEXT: movwlt r5, #1
+; CHECK-FP16-NEXT: cmp r5, #0
+; CHECK-FP16-NEXT: mvnne r5, #0
+; CHECK-FP16-NEXT: subs r0, r0, r6
+; CHECK-FP16-NEXT: sbcs r0, r1, #0
+; CHECK-FP16-NEXT: vld1.64 {d18, d19}, [r2:128]
+; CHECK-FP16-NEXT: mov r0, #0
+; CHECK-FP16-NEXT: mov r2, #0
+; CHECK-FP16-NEXT: movwlt r0, #1
+; CHECK-FP16-NEXT: cmp r0, #0
+; CHECK-FP16-NEXT: mvnne r0, #0
+; CHECK-FP16-NEXT: vmov.32 d12[1], r7
+; CHECK-FP16-NEXT: vdup.32 d17, r0
+; CHECK-FP16-NEXT: subs r0, r9, r6
+; CHECK-FP16-NEXT: sbcs r0, r10, #0
+; CHECK-FP16-NEXT: vdup.32 d16, r5
+; CHECK-FP16-NEXT: mov r0, #0
+; CHECK-FP16-NEXT: vbsl q8, q6, q9
+; CHECK-FP16-NEXT: movwlt r0, #1
+; CHECK-FP16-NEXT: cmp r0, #0
+; CHECK-FP16-NEXT: vmov.32 d11[1], r10
+; CHECK-FP16-NEXT: mvnne r0, #0
+; CHECK-FP16-NEXT: cmp r3, #0
+; CHECK-FP16-NEXT: mvn r6, #0
+; CHECK-FP16-NEXT: vdup.32 d21, r0
+; CHECK-FP16-NEXT: mvnne r3, #0
+; CHECK-FP16-NEXT: vmov.32 d10[1], r8
+; CHECK-FP16-NEXT: vmov r0, r1, d16
+; CHECK-FP16-NEXT: vdup.32 d20, r3
+; CHECK-FP16-NEXT: vbit q9, q5, q10
+; CHECK-FP16-NEXT: adr r5, .LCPI6_1
+; CHECK-FP16-NEXT: vld1.64 {d20, d21}, [r5:128]
+; CHECK-FP16-NEXT: vmov r5, r4, d17
+; CHECK-FP16-NEXT: vmov r3, r7, d18
+; CHECK-FP16-NEXT: rsbs r0, r0, #-2147483648
+; CHECK-FP16-NEXT: sbcs r0, r6, r1
+; CHECK-FP16-NEXT: mov r0, #0
+; CHECK-FP16-NEXT: movwlt r0, #1
+; CHECK-FP16-NEXT: cmp r0, #0
+; CHECK-FP16-NEXT: mvnne r0, #0
+; CHECK-FP16-NEXT: rsbs r1, r3, #-2147483648
+; CHECK-FP16-NEXT: vmov r1, r3, d19
+; CHECK-FP16-NEXT: sbcs r7, r6, r7
+; CHECK-FP16-NEXT: mov r7, #0
+; CHECK-FP16-NEXT: movwlt r7, #1
+; CHECK-FP16-NEXT: rsbs r5, r5, #-2147483648
+; CHECK-FP16-NEXT: sbcs r5, r6, r4
+; CHECK-FP16-NEXT: mov r5, #0
+; CHECK-FP16-NEXT: movwlt r5, #1
+; CHECK-FP16-NEXT: rsbs r1, r1, #-2147483648
+; CHECK-FP16-NEXT: sbcs r1, r6, r3
+; CHECK-FP16-NEXT: movwlt r2, #1
+; CHECK-FP16-NEXT: cmp r2, #0
+; CHECK-FP16-NEXT: mvnne r2, #0
+; CHECK-FP16-NEXT: cmp r5, #0
+; CHECK-FP16-NEXT: mvnne r5, #0
+; CHECK-FP16-NEXT: cmp r7, #0
+; CHECK-FP16-NEXT: vdup.32 d25, r5
+; CHECK-FP16-NEXT: mvnne r7, #0
+; CHECK-FP16-NEXT: vdup.32 d23, r2
+; CHECK-FP16-NEXT: vdup.32 d24, r0
+; CHECK-FP16-NEXT: vbif q8, q10, q12
+; CHECK-FP16-NEXT: vdup.32 d22, r7
+; CHECK-FP16-NEXT: vbif q9, q10, q11
+; CHECK-FP16-NEXT: vmovn.i64 d1, q8
+; CHECK-FP16-NEXT: vmovn.i64 d0, q9
+; CHECK-FP16-NEXT: vpop {d8}
+; CHECK-FP16-NEXT: vpop {d10, d11, d12, d13}
+; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+; CHECK-FP16-NEXT: .p2align 4
+; CHECK-FP16-NEXT: @ %bb.1:
+; CHECK-FP16-NEXT: .LCPI6_0:
+; CHECK-FP16-NEXT: .long 2147483647 @ 0x7fffffff
+; CHECK-FP16-NEXT: .long 0 @ 0x0
+; CHECK-FP16-NEXT: .long 2147483647 @ 0x7fffffff
+; CHECK-FP16-NEXT: .long 0 @ 0x0
+; CHECK-FP16-NEXT: .LCPI6_1:
+; CHECK-FP16-NEXT: .long 2147483648 @ 0x80000000
+; CHECK-FP16-NEXT: .long 4294967295 @ 0xffffffff
+; CHECK-FP16-NEXT: .long 2147483648 @ 0x80000000
+; CHECK-FP16-NEXT: .long 4294967295 @ 0xffffffff
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i64>
+ %0 = icmp slt <4 x i64> %conv, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
+ %1 = icmp sgt <4 x i64> %spec.store.select, <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
+ %spec.store.select7 = select <4 x i1> %1, <4 x i64> %spec.store.select, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @utesth_f16i32(<4 x half> %x) {
+; CHECK-NEON-LABEL: utesth_f16i32:
+; CHECK-NEON: @ %bb.0: @ %entry
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEON-NEXT: .vsave {d12, d13}
+; CHECK-NEON-NEXT: vpush {d12, d13}
+; CHECK-NEON-NEXT: .vsave {d8, d9, d10}
+; CHECK-NEON-NEXT: vpush {d8, d9, d10}
+; CHECK-NEON-NEXT: vmov r0, s3
+; CHECK-NEON-NEXT: vmov.f32 s16, s2
+; CHECK-NEON-NEXT: vmov.f32 s18, s1
+; CHECK-NEON-NEXT: vmov.f32 s20, s0
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: bl __aeabi_f2ulz
+; CHECK-NEON-NEXT: mov r10, r0
+; CHECK-NEON-NEXT: vmov r0, s18
+; CHECK-NEON-NEXT: mov r8, r1
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: bl __aeabi_f2ulz
+; CHECK-NEON-NEXT: mov r6, r0
+; CHECK-NEON-NEXT: vmov.32 d13[0], r0
+; CHECK-NEON-NEXT: vmov r0, s20
+; CHECK-NEON-NEXT: mov r9, r1
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: bl __aeabi_f2ulz
+; CHECK-NEON-NEXT: mov r5, r0
+; CHECK-NEON-NEXT: vmov.32 d12[0], r0
+; CHECK-NEON-NEXT: vmov r0, s16
+; CHECK-NEON-NEXT: mov r7, r1
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov.32 d9[0], r10
+; CHECK-NEON-NEXT: bl __aeabi_f2ulz
+; CHECK-NEON-NEXT: mvn r4, #0
+; CHECK-NEON-NEXT: subs r2, r5, r4
+; CHECK-NEON-NEXT: sbcs r2, r7, #0
+; CHECK-NEON-NEXT: vmov.32 d8[0], r0
+; CHECK-NEON-NEXT: mov r2, #0
+; CHECK-NEON-NEXT: mov r3, #0
+; CHECK-NEON-NEXT: movwlo r2, #1
+; CHECK-NEON-NEXT: subs r0, r0, r4
+; CHECK-NEON-NEXT: sbcs r0, r1, #0
+; CHECK-NEON-NEXT: mov r0, #0
+; CHECK-NEON-NEXT: movwlo r0, #1
+; CHECK-NEON-NEXT: cmp r0, #0
+; CHECK-NEON-NEXT: mvnne r0, #0
+; CHECK-NEON-NEXT: subs r1, r10, r4
+; CHECK-NEON-NEXT: sbcs r1, r8, #0
+; CHECK-NEON-NEXT: mov r1, #0
+; CHECK-NEON-NEXT: movwlo r1, #1
+; CHECK-NEON-NEXT: subs r7, r6, r4
+; CHECK-NEON-NEXT: sbcs r7, r9, #0
+; CHECK-NEON-NEXT: movwlo r3, #1
+; CHECK-NEON-NEXT: cmp r3, #0
+; CHECK-NEON-NEXT: mvnne r3, #0
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: mvnne r1, #0
+; CHECK-NEON-NEXT: cmp r2, #0
+; CHECK-NEON-NEXT: vdup.32 d19, r1
+; CHECK-NEON-NEXT: mvnne r2, #0
+; CHECK-NEON-NEXT: vdup.32 d17, r3
+; CHECK-NEON-NEXT: vdup.32 d18, r0
+; CHECK-NEON-NEXT: vand q10, q4, q9
+; CHECK-NEON-NEXT: vdup.32 d16, r2
+; CHECK-NEON-NEXT: vand q11, q6, q8
+; CHECK-NEON-NEXT: vorn q9, q10, q9
+; CHECK-NEON-NEXT: vorn q8, q11, q8
+; CHECK-NEON-NEXT: vmovn.i64 d1, q9
+; CHECK-NEON-NEXT: vmovn.i64 d0, q8
+; CHECK-NEON-NEXT: vpop {d8, d9, d10}
+; CHECK-NEON-NEXT: vpop {d12, d13}
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; CHECK-FP16-LABEL: utesth_f16i32:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-FP16-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-FP16-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-FP16-NEXT: vmov.u16 r0, d0[1]
+; CHECK-FP16-NEXT: vorr d8, d0, d0
+; CHECK-FP16-NEXT: vmov.u16 r5, d0[3]
+; CHECK-FP16-NEXT: vmov s0, r0
+; CHECK-FP16-NEXT: bl __fixunshfdi
+; CHECK-FP16-NEXT: mov r10, r0
+; CHECK-FP16-NEXT: vmov.u16 r0, d8[0]
+; CHECK-FP16-NEXT: mov r8, r1
+; CHECK-FP16-NEXT: vmov.32 d11[0], r10
+; CHECK-FP16-NEXT: vmov s0, r0
+; CHECK-FP16-NEXT: bl __fixunshfdi
+; CHECK-FP16-NEXT: vmov s0, r5
+; CHECK-FP16-NEXT: mov r6, r0
+; CHECK-FP16-NEXT: mov r7, r1
+; CHECK-FP16-NEXT: vmov.32 d10[0], r0
+; CHECK-FP16-NEXT: bl __fixunshfdi
+; CHECK-FP16-NEXT: mov r5, r0
+; CHECK-FP16-NEXT: vmov.u16 r0, d8[2]
+; CHECK-FP16-NEXT: mov r9, r1
+; CHECK-FP16-NEXT: vmov.32 d9[0], r5
+; CHECK-FP16-NEXT: vmov s0, r0
+; CHECK-FP16-NEXT: bl __fixunshfdi
+; CHECK-FP16-NEXT: mvn r4, #0
+; CHECK-FP16-NEXT: subs r2, r6, r4
+; CHECK-FP16-NEXT: sbcs r2, r7, #0
+; CHECK-FP16-NEXT: vmov.32 d8[0], r0
+; CHECK-FP16-NEXT: mov r2, #0
+; CHECK-FP16-NEXT: mov r3, #0
+; CHECK-FP16-NEXT: movwlo r2, #1
+; CHECK-FP16-NEXT: subs r0, r0, r4
+; CHECK-FP16-NEXT: sbcs r0, r1, #0
+; CHECK-FP16-NEXT: mov r0, #0
+; CHECK-FP16-NEXT: movwlo r0, #1
+; CHECK-FP16-NEXT: cmp r0, #0
+; CHECK-FP16-NEXT: mvnne r0, #0
+; CHECK-FP16-NEXT: subs r1, r5, r4
+; CHECK-FP16-NEXT: sbcs r1, r9, #0
+; CHECK-FP16-NEXT: mov r1, #0
+; CHECK-FP16-NEXT: movwlo r1, #1
+; CHECK-FP16-NEXT: subs r7, r10, r4
+; CHECK-FP16-NEXT: sbcs r7, r8, #0
+; CHECK-FP16-NEXT: movwlo r3, #1
+; CHECK-FP16-NEXT: cmp r3, #0
+; CHECK-FP16-NEXT: mvnne r3, #0
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: mvnne r1, #0
+; CHECK-FP16-NEXT: cmp r2, #0
+; CHECK-FP16-NEXT: vdup.32 d19, r1
+; CHECK-FP16-NEXT: mvnne r2, #0
+; CHECK-FP16-NEXT: vdup.32 d17, r3
+; CHECK-FP16-NEXT: vdup.32 d18, r0
+; CHECK-FP16-NEXT: vand q10, q4, q9
+; CHECK-FP16-NEXT: vdup.32 d16, r2
+; CHECK-FP16-NEXT: vand q11, q5, q8
+; CHECK-FP16-NEXT: vorn q9, q10, q9
+; CHECK-FP16-NEXT: vorn q8, q11, q8
+; CHECK-FP16-NEXT: vmovn.i64 d1, q9
+; CHECK-FP16-NEXT: vmovn.i64 d0, q8
+; CHECK-FP16-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+entry:
+ %conv = fptoui <4 x half> %x to <4 x i64>
+ %0 = icmp ult <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %conv6 = trunc <4 x i64> %spec.store.select to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @ustest_f16i32(<4 x half> %x) {
+; CHECK-NEON-LABEL: ustest_f16i32:
+; CHECK-NEON: @ %bb.0: @ %entry
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
+; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13}
+; CHECK-NEON-NEXT: vmov r0, s1
+; CHECK-NEON-NEXT: vmov.f32 s16, s3
+; CHECK-NEON-NEXT: vmov.f32 s18, s2
+; CHECK-NEON-NEXT: vmov.f32 s20, s0
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: bl __aeabi_f2lz
+; CHECK-NEON-NEXT: mov r6, r0
+; CHECK-NEON-NEXT: vmov r0, s20
+; CHECK-NEON-NEXT: mov r7, r1
+; CHECK-NEON-NEXT: vmov r5, s18
+; CHECK-NEON-NEXT: vmov r8, s16
+; CHECK-NEON-NEXT: vmov.32 d9[0], r6
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: bl __aeabi_f2lz
+; CHECK-NEON-NEXT: vmov.32 d8[0], r0
+; CHECK-NEON-NEXT: mvn r9, #0
+; CHECK-NEON-NEXT: subs r0, r0, r9
+; CHECK-NEON-NEXT: mov r4, #0
+; CHECK-NEON-NEXT: sbcs r0, r1, #0
+; CHECK-NEON-NEXT: vmov.32 d9[1], r7
+; CHECK-NEON-NEXT: mov r0, #0
+; CHECK-NEON-NEXT: movwlt r0, #1
+; CHECK-NEON-NEXT: cmp r0, #0
+; CHECK-NEON-NEXT: vmov.32 d8[1], r1
+; CHECK-NEON-NEXT: mvnne r0, #0
+; CHECK-NEON-NEXT: subs r1, r6, r9
+; CHECK-NEON-NEXT: sbcs r1, r7, #0
+; CHECK-NEON-NEXT: mov r1, #0
+; CHECK-NEON-NEXT: movwlt r1, #1
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: mvnne r1, #0
+; CHECK-NEON-NEXT: vdup.32 d13, r1
+; CHECK-NEON-NEXT: vdup.32 d12, r0
+; CHECK-NEON-NEXT: mov r0, r5
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov.i64 q5, #0xffffffff
+; CHECK-NEON-NEXT: vbif q4, q5, q6
+; CHECK-NEON-NEXT: bl __aeabi_f2lz
+; CHECK-NEON-NEXT: mov r5, r0
+; CHECK-NEON-NEXT: vmov.32 d12[0], r0
+; CHECK-NEON-NEXT: mov r0, r8
+; CHECK-NEON-NEXT: mov r6, r1
+; CHECK-NEON-NEXT: vmov r7, r10, d8
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: bl __aeabi_f2lz
+; CHECK-NEON-NEXT: subs r2, r5, r9
+; CHECK-NEON-NEXT: vmov.32 d13[0], r0
+; CHECK-NEON-NEXT: sbcs r2, r6, #0
+; CHECK-NEON-NEXT: mov r2, #0
+; CHECK-NEON-NEXT: movwlt r2, #1
+; CHECK-NEON-NEXT: subs r0, r0, r9
+; CHECK-NEON-NEXT: sbcs r0, r1, #0
+; CHECK-NEON-NEXT: vmov.32 d13[1], r1
+; CHECK-NEON-NEXT: mov r0, #0
+; CHECK-NEON-NEXT: movwlt r0, #1
+; CHECK-NEON-NEXT: cmp r0, #0
+; CHECK-NEON-NEXT: mvnne r0, #0
+; CHECK-NEON-NEXT: cmp r2, #0
+; CHECK-NEON-NEXT: vmov.32 d12[1], r6
+; CHECK-NEON-NEXT: mvnne r2, #0
+; CHECK-NEON-NEXT: vdup.32 d17, r0
+; CHECK-NEON-NEXT: rsbs r0, r7, #0
+; CHECK-NEON-NEXT: vdup.32 d16, r2
+; CHECK-NEON-NEXT: vmov r7, r5, d9
+; CHECK-NEON-NEXT: vbsl q8, q6, q5
+; CHECK-NEON-NEXT: rscs r0, r10, #0
+; CHECK-NEON-NEXT: mov r0, #0
+; CHECK-NEON-NEXT: movwlt r0, #1
+; CHECK-NEON-NEXT: vmov r1, r2, d16
+; CHECK-NEON-NEXT: vmov r3, r6, d17
+; CHECK-NEON-NEXT: rsbs r1, r1, #0
+; CHECK-NEON-NEXT: rscs r1, r2, #0
+; CHECK-NEON-NEXT: mov r1, #0
+; CHECK-NEON-NEXT: movwlt r1, #1
+; CHECK-NEON-NEXT: rsbs r2, r3, #0
+; CHECK-NEON-NEXT: rscs r2, r6, #0
+; CHECK-NEON-NEXT: mov r2, #0
+; CHECK-NEON-NEXT: movwlt r2, #1
+; CHECK-NEON-NEXT: rsbs r3, r7, #0
+; CHECK-NEON-NEXT: rscs r3, r5, #0
+; CHECK-NEON-NEXT: movwlt r4, #1
+; CHECK-NEON-NEXT: cmp r4, #0
+; CHECK-NEON-NEXT: mvnne r4, #0
+; CHECK-NEON-NEXT: cmp r2, #0
+; CHECK-NEON-NEXT: mvnne r2, #0
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: mvnne r1, #0
+; CHECK-NEON-NEXT: vmov.32 d21[0], r2
+; CHECK-NEON-NEXT: cmp r0, #0
+; CHECK-NEON-NEXT: vmov.32 d20[0], r1
+; CHECK-NEON-NEXT: mvnne r0, #0
+; CHECK-NEON-NEXT: vmov.32 d19[0], r4
+; CHECK-NEON-NEXT: vand q8, q8, q10
+; CHECK-NEON-NEXT: vmov.32 d18[0], r0
+; CHECK-NEON-NEXT: vmovn.i64 d1, q8
+; CHECK-NEON-NEXT: vand q9, q4, q9
+; CHECK-NEON-NEXT: vmovn.i64 d0, q9
+; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13}
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; CHECK-FP16-LABEL: ustest_f16i32:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-FP16-NEXT: .vsave {d10, d11, d12, d13, d14, d15}
+; CHECK-FP16-NEXT: vpush {d10, d11, d12, d13, d14, d15}
+; CHECK-FP16-NEXT: .vsave {d8}
+; CHECK-FP16-NEXT: vpush {d8}
+; CHECK-FP16-NEXT: vmov.u16 r0, d0[1]
+; CHECK-FP16-NEXT: vorr d8, d0, d0
+; CHECK-FP16-NEXT: vmov.u16 r8, d0[2]
+; CHECK-FP16-NEXT: vmov.u16 r9, d0[3]
+; CHECK-FP16-NEXT: vmov s0, r0
+; CHECK-FP16-NEXT: bl __fixhfdi
+; CHECK-FP16-NEXT: mov r4, r0
+; CHECK-FP16-NEXT: vmov.u16 r0, d8[0]
+; CHECK-FP16-NEXT: mov r5, r1
+; CHECK-FP16-NEXT: vmov.32 d11[0], r4
+; CHECK-FP16-NEXT: vmov s0, r0
+; CHECK-FP16-NEXT: bl __fixhfdi
+; CHECK-FP16-NEXT: vmov.32 d10[0], r0
+; CHECK-FP16-NEXT: mvn r7, #0
+; CHECK-FP16-NEXT: subs r0, r0, r7
+; CHECK-FP16-NEXT: vmov.i64 q6, #0xffffffff
+; CHECK-FP16-NEXT: sbcs r0, r1, #0
+; CHECK-FP16-NEXT: vmov.32 d11[1], r5
+; CHECK-FP16-NEXT: mov r0, #0
+; CHECK-FP16-NEXT: vmov s0, r8
+; CHECK-FP16-NEXT: movwlt r0, #1
+; CHECK-FP16-NEXT: cmp r0, #0
+; CHECK-FP16-NEXT: vmov.32 d10[1], r1
+; CHECK-FP16-NEXT: mvnne r0, #0
+; CHECK-FP16-NEXT: subs r1, r4, r7
+; CHECK-FP16-NEXT: mov r6, #0
+; CHECK-FP16-NEXT: sbcs r1, r5, #0
+; CHECK-FP16-NEXT: vmov s16, r9
+; CHECK-FP16-NEXT: mov r1, #0
+; CHECK-FP16-NEXT: movwlt r1, #1
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: mvnne r1, #0
+; CHECK-FP16-NEXT: vdup.32 d17, r1
+; CHECK-FP16-NEXT: vdup.32 d16, r0
+; CHECK-FP16-NEXT: vbif q5, q6, q8
+; CHECK-FP16-NEXT: vmov r9, r8, d10
+; CHECK-FP16-NEXT: bl __fixhfdi
+; CHECK-FP16-NEXT: vmov.f32 s0, s16
+; CHECK-FP16-NEXT: mov r4, r0
+; CHECK-FP16-NEXT: mov r5, r1
+; CHECK-FP16-NEXT: vmov.32 d14[0], r0
+; CHECK-FP16-NEXT: bl __fixhfdi
+; CHECK-FP16-NEXT: subs r2, r4, r7
+; CHECK-FP16-NEXT: vmov.32 d15[0], r0
+; CHECK-FP16-NEXT: sbcs r2, r5, #0
+; CHECK-FP16-NEXT: mov r2, #0
+; CHECK-FP16-NEXT: movwlt r2, #1
+; CHECK-FP16-NEXT: subs r0, r0, r7
+; CHECK-FP16-NEXT: sbcs r0, r1, #0
+; CHECK-FP16-NEXT: vmov.32 d15[1], r1
+; CHECK-FP16-NEXT: mov r0, #0
+; CHECK-FP16-NEXT: movwlt r0, #1
+; CHECK-FP16-NEXT: cmp r0, #0
+; CHECK-FP16-NEXT: mvnne r0, #0
+; CHECK-FP16-NEXT: cmp r2, #0
+; CHECK-FP16-NEXT: vmov.32 d14[1], r5
+; CHECK-FP16-NEXT: mvnne r2, #0
+; CHECK-FP16-NEXT: vmov r5, r4, d11
+; CHECK-FP16-NEXT: vdup.32 d17, r0
+; CHECK-FP16-NEXT: rsbs r0, r9, #0
+; CHECK-FP16-NEXT: vdup.32 d16, r2
+; CHECK-FP16-NEXT: rscs r0, r8, #0
+; CHECK-FP16-NEXT: vbsl q8, q7, q6
+; CHECK-FP16-NEXT: mov r0, #0
+; CHECK-FP16-NEXT: movwlt r0, #1
+; CHECK-FP16-NEXT: vmov r1, r2, d16
+; CHECK-FP16-NEXT: vmov r3, r7, d17
+; CHECK-FP16-NEXT: rsbs r1, r1, #0
+; CHECK-FP16-NEXT: rscs r1, r2, #0
+; CHECK-FP16-NEXT: mov r1, #0
+; CHECK-FP16-NEXT: movwlt r1, #1
+; CHECK-FP16-NEXT: rsbs r2, r3, #0
+; CHECK-FP16-NEXT: rscs r2, r7, #0
+; CHECK-FP16-NEXT: mov r2, #0
+; CHECK-FP16-NEXT: movwlt r2, #1
+; CHECK-FP16-NEXT: rsbs r3, r5, #0
+; CHECK-FP16-NEXT: rscs r3, r4, #0
+; CHECK-FP16-NEXT: movwlt r6, #1
+; CHECK-FP16-NEXT: cmp r6, #0
+; CHECK-FP16-NEXT: mvnne r6, #0
+; CHECK-FP16-NEXT: cmp r2, #0
+; CHECK-FP16-NEXT: mvnne r2, #0
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: mvnne r1, #0
+; CHECK-FP16-NEXT: vmov.32 d21[0], r2
+; CHECK-FP16-NEXT: cmp r0, #0
+; CHECK-FP16-NEXT: vmov.32 d20[0], r1
+; CHECK-FP16-NEXT: mvnne r0, #0
+; CHECK-FP16-NEXT: vmov.32 d19[0], r6
+; CHECK-FP16-NEXT: vand q8, q8, q10
+; CHECK-FP16-NEXT: vmov.32 d18[0], r0
+; CHECK-FP16-NEXT: vmovn.i64 d1, q8
+; CHECK-FP16-NEXT: vand q9, q5, q9
+; CHECK-FP16-NEXT: vmovn.i64 d0, q9
+; CHECK-FP16-NEXT: vpop {d8}
+; CHECK-FP16-NEXT: vpop {d10, d11, d12, d13, d14, d15}
+; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i64>
+ %0 = icmp slt <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %1 = icmp sgt <4 x i64> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <4 x i1> %1, <4 x i64> %spec.store.select, <4 x i64> zeroinitializer
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+; i16 saturate
+
+define <2 x i16> @stest_f64i16(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcvt.s32.f64 s4, d0
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vcvt.s32.f64 s0, d1
+; CHECK-NEXT: vmov.i32 d17, #0x7fff
+; CHECK-NEXT: vmvn.i32 d18, #0x7fff
+; CHECK-NEXT: vmov.32 d16[0], r0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: vmov.32 d16[1], r0
+; CHECK-NEXT: vmin.s32 d16, d16, d17
+; CHECK-NEXT: vmax.s32 d0, d16, d18
+; CHECK-NEXT: bx lr
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i32>
+ %0 = icmp slt <2 x i32> %conv, <i32 32767, i32 32767>
+ %spec.store.select = select <2 x i1> %0, <2 x i32> %conv, <2 x i32> <i32 32767, i32 32767>
+ %1 = icmp sgt <2 x i32> %spec.store.select, <i32 -32768, i32 -32768>
+ %spec.store.select7 = select <2 x i1> %1, <2 x i32> %spec.store.select, <2 x i32> <i32 -32768, i32 -32768>
+ %conv6 = trunc <2 x i32> %spec.store.select7 to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <2 x i16> @utest_f64i16(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcvt.u32.f64 s4, d0
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vcvt.u32.f64 s0, d1
+; CHECK-NEXT: vmov.i32 d17, #0xffff
+; CHECK-NEXT: vmov.32 d16[0], r0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: vmov.32 d16[1], r0
+; CHECK-NEXT: vmin.u32 d0, d16, d17
+; CHECK-NEXT: bx lr
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i32>
+ %0 = icmp ult <2 x i32> %conv, <i32 65535, i32 65535>
+ %spec.store.select = select <2 x i1> %0, <2 x i32> %conv, <2 x i32> <i32 65535, i32 65535>
+ %conv6 = trunc <2 x i32> %spec.store.select to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <2 x i16> @ustest_f64i16(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcvt.s32.f64 s4, d0
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vcvt.s32.f64 s0, d1
+; CHECK-NEXT: vmov.i32 d17, #0xffff
+; CHECK-NEXT: vmov.i32 d18, #0x0
+; CHECK-NEXT: vmov.32 d16[0], r0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: vmov.32 d16[1], r0
+; CHECK-NEXT: vmin.s32 d16, d16, d17
+; CHECK-NEXT: vmax.s32 d0, d16, d18
+; CHECK-NEXT: bx lr
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i32>
+ %0 = icmp slt <2 x i32> %conv, <i32 65535, i32 65535>
+ %spec.store.select = select <2 x i1> %0, <2 x i32> %conv, <2 x i32> <i32 65535, i32 65535>
+ %1 = icmp sgt <2 x i32> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <2 x i1> %1, <2 x i32> %spec.store.select, <2 x i32> zeroinitializer
+ %conv6 = trunc <2 x i32> %spec.store.select7 to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <4 x i16> @stest_f32i16(<4 x float> %x) {
+; CHECK-LABEL: stest_f32i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcvt.s32.f32 q8, q0
+; CHECK-NEXT: vmov.i32 q9, #0x7fff
+; CHECK-NEXT: vmvn.i32 q10, #0x7fff
+; CHECK-NEXT: vmin.s32 q8, q8, q9
+; CHECK-NEXT: vmax.s32 q8, q8, q10
+; CHECK-NEXT: vmovn.i32 d0, q8
+; CHECK-NEXT: bx lr
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i32>
+ %0 = icmp slt <4 x i32> %conv, <i32 32767, i32 32767, i32 32767, i32 32767>
+ %spec.store.select = select <4 x i1> %0, <4 x i32> %conv, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>
+ %1 = icmp sgt <4 x i32> %spec.store.select, <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %spec.store.select7 = select <4 x i1> %1, <4 x i32> %spec.store.select, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %conv6 = trunc <4 x i32> %spec.store.select7 to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <4 x i16> @utest_f32i16(<4 x float> %x) {
+; CHECK-LABEL: utest_f32i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcvt.u32.f32 q8, q0
+; CHECK-NEXT: vmov.i32 q9, #0xffff
+; CHECK-NEXT: vmin.u32 q8, q8, q9
+; CHECK-NEXT: vmovn.i32 d0, q8
+; CHECK-NEXT: bx lr
+entry:
+ %conv = fptoui <4 x float> %x to <4 x i32>
+ %0 = icmp ult <4 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535>
+ %spec.store.select = select <4 x i1> %0, <4 x i32> %conv, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
+ %conv6 = trunc <4 x i32> %spec.store.select to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <4 x i16> @ustest_f32i16(<4 x float> %x) {
+; CHECK-LABEL: ustest_f32i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcvt.s32.f32 q8, q0
+; CHECK-NEXT: vmov.i32 q9, #0xffff
+; CHECK-NEXT: vmov.i32 q10, #0x0
+; CHECK-NEXT: vmin.s32 q8, q8, q9
+; CHECK-NEXT: vmax.s32 q8, q8, q10
+; CHECK-NEXT: vmovn.i32 d0, q8
+; CHECK-NEXT: bx lr
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i32>
+ %0 = icmp slt <4 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535>
+ %spec.store.select = select <4 x i1> %0, <4 x i32> %conv, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
+ %1 = icmp sgt <4 x i32> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <4 x i1> %1, <4 x i32> %spec.store.select, <4 x i32> zeroinitializer
+ %conv6 = trunc <4 x i32> %spec.store.select7 to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <8 x i16> @stest_f16i16(<8 x half> %x) {
+; CHECK-NEON-LABEL: stest_f16i16:
+; CHECK-NEON: @ %bb.0: @ %entry
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEON-NEXT: vmov r0, s1
+; CHECK-NEON-NEXT: vmov.f32 s16, s7
+; CHECK-NEON-NEXT: vmov.f32 s18, s6
+; CHECK-NEON-NEXT: vmov.f32 s20, s5
+; CHECK-NEON-NEXT: vmov.f32 s22, s4
+; CHECK-NEON-NEXT: vmov.f32 s24, s3
+; CHECK-NEON-NEXT: vmov.f32 s26, s2
+; CHECK-NEON-NEXT: vmov.f32 s28, s0
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r4, r0
+; CHECK-NEON-NEXT: vmov r0, s26
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r5, r0
+; CHECK-NEON-NEXT: vmov r0, s22
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r6, r0
+; CHECK-NEON-NEXT: vmov r0, s24
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r7, r0
+; CHECK-NEON-NEXT: vmov r0, s18
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.32 d13[0], r0
+; CHECK-NEON-NEXT: vmov r0, s16
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vmov s22, r7
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov s30, r6
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.32 d13[1], r0
+; CHECK-NEON-NEXT: vmov r0, s28
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vmov r1, s20
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov s2, r5
+; CHECK-NEON-NEXT: vcvt.s32.f32 s20, s2
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s30
+; CHECK-NEON-NEXT: vmov.32 d8[0], r0
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.32 d12[0], r0
+; CHECK-NEON-NEXT: mov r0, r1
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vmov r0, s20
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov s2, r4
+; CHECK-NEON-NEXT: vmov.i32 q8, #0x7fff
+; CHECK-NEON-NEXT: vcvt.s32.f32 s2, s2
+; CHECK-NEON-NEXT: vmvn.i32 q9, #0x7fff
+; CHECK-NEON-NEXT: vmov.32 d9[0], r0
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s22
+; CHECK-NEON-NEXT: vmov.32 d12[1], r0
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmin.s32 q10, q6, q8
+; CHECK-NEON-NEXT: vmax.s32 q10, q10, q9
+; CHECK-NEON-NEXT: vmov.32 d9[1], r0
+; CHECK-NEON-NEXT: vmov r0, s2
+; CHECK-NEON-NEXT: vmovn.i32 d1, q10
+; CHECK-NEON-NEXT: vmov.32 d8[1], r0
+; CHECK-NEON-NEXT: vmin.s32 q8, q4, q8
+; CHECK-NEON-NEXT: vmax.s32 q8, q8, q9
+; CHECK-NEON-NEXT: vmovn.i32 d0, q8
+; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
+;
+; CHECK-FP16-LABEL: stest_f16i16:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: vmovx.f16 s4, s0
+; CHECK-FP16-NEXT: vcvt.s32.f16 s12, s0
+; CHECK-FP16-NEXT: vcvt.s32.f16 s0, s3
+; CHECK-FP16-NEXT: vcvt.s32.f16 s5, s2
+; CHECK-FP16-NEXT: vmov r0, s0
+; CHECK-FP16-NEXT: vcvt.s32.f16 s14, s1
+; CHECK-FP16-NEXT: vmovx.f16 s10, s3
+; CHECK-FP16-NEXT: vmovx.f16 s8, s2
+; CHECK-FP16-NEXT: vcvt.s32.f16 s10, s10
+; CHECK-FP16-NEXT: vcvt.s32.f16 s8, s8
+; CHECK-FP16-NEXT: vmovx.f16 s6, s1
+; CHECK-FP16-NEXT: vcvt.s32.f16 s4, s4
+; CHECK-FP16-NEXT: vcvt.s32.f16 s6, s6
+; CHECK-FP16-NEXT: vmov.i32 q10, #0x7fff
+; CHECK-FP16-NEXT: vmvn.i32 q11, #0x7fff
+; CHECK-FP16-NEXT: vmov.32 d17[0], r0
+; CHECK-FP16-NEXT: vmov r0, s5
+; CHECK-FP16-NEXT: vmov.32 d16[0], r0
+; CHECK-FP16-NEXT: vmov r0, s14
+; CHECK-FP16-NEXT: vmov.32 d19[0], r0
+; CHECK-FP16-NEXT: vmov r0, s12
+; CHECK-FP16-NEXT: vmov.32 d18[0], r0
+; CHECK-FP16-NEXT: vmov r0, s10
+; CHECK-FP16-NEXT: vmov.32 d17[1], r0
+; CHECK-FP16-NEXT: vmov r0, s8
+; CHECK-FP16-NEXT: vmov.32 d16[1], r0
+; CHECK-FP16-NEXT: vmov r0, s6
+; CHECK-FP16-NEXT: vmin.s32 q8, q8, q10
+; CHECK-FP16-NEXT: vmax.s32 q8, q8, q11
+; CHECK-FP16-NEXT: vmovn.i32 d1, q8
+; CHECK-FP16-NEXT: vmov.32 d19[1], r0
+; CHECK-FP16-NEXT: vmov r0, s4
+; CHECK-FP16-NEXT: vmov.32 d18[1], r0
+; CHECK-FP16-NEXT: vmin.s32 q9, q9, q10
+; CHECK-FP16-NEXT: vmax.s32 q9, q9, q11
+; CHECK-FP16-NEXT: vmovn.i32 d0, q9
+; CHECK-FP16-NEXT: bx lr
+entry:
+ %conv = fptosi <8 x half> %x to <8 x i32>
+ %0 = icmp slt <8 x i32> %conv, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
+ %spec.store.select = select <8 x i1> %0, <8 x i32> %conv, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
+ %1 = icmp sgt <8 x i32> %spec.store.select, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %spec.store.select7 = select <8 x i1> %1, <8 x i32> %spec.store.select, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %conv6 = trunc <8 x i32> %spec.store.select7 to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+define <8 x i16> @utesth_f16i16(<8 x half> %x) {
+; CHECK-NEON-LABEL: utesth_f16i16:
+; CHECK-NEON: @ %bb.0: @ %entry
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
+; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
+; CHECK-NEON-NEXT: vmov r0, s1
+; CHECK-NEON-NEXT: vmov.f32 s16, s7
+; CHECK-NEON-NEXT: vmov.f32 s18, s6
+; CHECK-NEON-NEXT: vmov.f32 s20, s5
+; CHECK-NEON-NEXT: vmov.f32 s22, s4
+; CHECK-NEON-NEXT: vmov.f32 s24, s3
+; CHECK-NEON-NEXT: vmov.f32 s26, s2
+; CHECK-NEON-NEXT: vmov.f32 s28, s0
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r4, r0
+; CHECK-NEON-NEXT: vmov r0, s26
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r5, r0
+; CHECK-NEON-NEXT: vmov r0, s22
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r6, r0
+; CHECK-NEON-NEXT: vmov r0, s24
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r7, r0
+; CHECK-NEON-NEXT: vmov r0, s18
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vcvt.u32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.32 d13[0], r0
+; CHECK-NEON-NEXT: vmov r0, s16
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vmov s16, r7
+; CHECK-NEON-NEXT: vcvt.u32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov s18, r6
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.32 d13[1], r0
+; CHECK-NEON-NEXT: vmov r0, s28
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vmov r1, s20
+; CHECK-NEON-NEXT: vcvt.u32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov s2, r5
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vcvt.u32.f32 s0, s18
+; CHECK-NEON-NEXT: vcvt.u32.f32 s18, s2
+; CHECK-NEON-NEXT: vmov.32 d10[0], r0
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.32 d12[0], r0
+; CHECK-NEON-NEXT: mov r0, r1
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vmov r0, s18
+; CHECK-NEON-NEXT: vcvt.u32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov s2, r4
+; CHECK-NEON-NEXT: vmov.i32 q8, #0xffff
+; CHECK-NEON-NEXT: vcvt.u32.f32 s2, s2
+; CHECK-NEON-NEXT: vmov.32 d11[0], r0
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vcvt.u32.f32 s0, s16
+; CHECK-NEON-NEXT: vmov.32 d12[1], r0
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmin.u32 q9, q6, q8
+; CHECK-NEON-NEXT: vmov.32 d11[1], r0
+; CHECK-NEON-NEXT: vmov r0, s2
+; CHECK-NEON-NEXT: vmovn.i32 d1, q9
+; CHECK-NEON-NEXT: vmov.32 d10[1], r0
+; CHECK-NEON-NEXT: vmin.u32 q8, q5, q8
+; CHECK-NEON-NEXT: vmovn.i32 d0, q8
+; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
+;
+; CHECK-FP16-LABEL: utesth_f16i16:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: vmovx.f16 s4, s0
+; CHECK-FP16-NEXT: vcvt.u32.f16 s12, s0
+; CHECK-FP16-NEXT: vcvt.u32.f16 s0, s3
+; CHECK-FP16-NEXT: vcvt.u32.f16 s5, s2
+; CHECK-FP16-NEXT: vmov r0, s0
+; CHECK-FP16-NEXT: vcvt.u32.f16 s14, s1
+; CHECK-FP16-NEXT: vmovx.f16 s10, s3
+; CHECK-FP16-NEXT: vmovx.f16 s8, s2
+; CHECK-FP16-NEXT: vcvt.u32.f16 s10, s10
+; CHECK-FP16-NEXT: vcvt.u32.f16 s8, s8
+; CHECK-FP16-NEXT: vmovx.f16 s6, s1
+; CHECK-FP16-NEXT: vcvt.u32.f16 s4, s4
+; CHECK-FP16-NEXT: vcvt.u32.f16 s6, s6
+; CHECK-FP16-NEXT: vmov.i32 q10, #0xffff
+; CHECK-FP16-NEXT: vmov.32 d17[0], r0
+; CHECK-FP16-NEXT: vmov r0, s5
+; CHECK-FP16-NEXT: vmov.32 d16[0], r0
+; CHECK-FP16-NEXT: vmov r0, s14
+; CHECK-FP16-NEXT: vmov.32 d19[0], r0
+; CHECK-FP16-NEXT: vmov r0, s12
+; CHECK-FP16-NEXT: vmov.32 d18[0], r0
+; CHECK-FP16-NEXT: vmov r0, s10
+; CHECK-FP16-NEXT: vmov.32 d17[1], r0
+; CHECK-FP16-NEXT: vmov r0, s8
+; CHECK-FP16-NEXT: vmov.32 d16[1], r0
+; CHECK-FP16-NEXT: vmov r0, s6
+; CHECK-FP16-NEXT: vmin.u32 q8, q8, q10
+; CHECK-FP16-NEXT: vmovn.i32 d1, q8
+; CHECK-FP16-NEXT: vmov.32 d19[1], r0
+; CHECK-FP16-NEXT: vmov r0, s4
+; CHECK-FP16-NEXT: vmov.32 d18[1], r0
+; CHECK-FP16-NEXT: vmin.u32 q9, q9, q10
+; CHECK-FP16-NEXT: vmovn.i32 d0, q9
+; CHECK-FP16-NEXT: bx lr
+entry:
+ %conv = fptoui <8 x half> %x to <8 x i32>
+ %0 = icmp ult <8 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %spec.store.select = select <8 x i1> %0, <8 x i32> %conv, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %conv6 = trunc <8 x i32> %spec.store.select to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+define <8 x i16> @ustest_f16i16(<8 x half> %x) {
+; CHECK-NEON-LABEL: ustest_f16i16:
+; CHECK-NEON: @ %bb.0: @ %entry
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEON-NEXT: vmov r0, s1
+; CHECK-NEON-NEXT: vmov.f32 s16, s7
+; CHECK-NEON-NEXT: vmov.f32 s18, s6
+; CHECK-NEON-NEXT: vmov.f32 s20, s5
+; CHECK-NEON-NEXT: vmov.f32 s22, s4
+; CHECK-NEON-NEXT: vmov.f32 s24, s3
+; CHECK-NEON-NEXT: vmov.f32 s26, s2
+; CHECK-NEON-NEXT: vmov.f32 s28, s0
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r4, r0
+; CHECK-NEON-NEXT: vmov r0, s26
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r5, r0
+; CHECK-NEON-NEXT: vmov r0, s22
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r6, r0
+; CHECK-NEON-NEXT: vmov r0, s24
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r7, r0
+; CHECK-NEON-NEXT: vmov r0, s18
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.32 d13[0], r0
+; CHECK-NEON-NEXT: vmov r0, s16
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vmov s22, r7
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov s30, r6
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.32 d13[1], r0
+; CHECK-NEON-NEXT: vmov r0, s28
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vmov r1, s20
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov s2, r5
+; CHECK-NEON-NEXT: vcvt.s32.f32 s20, s2
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s30
+; CHECK-NEON-NEXT: vmov.32 d8[0], r0
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.32 d12[0], r0
+; CHECK-NEON-NEXT: mov r0, r1
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vmov r0, s20
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov s2, r4
+; CHECK-NEON-NEXT: vmov.i32 q8, #0xffff
+; CHECK-NEON-NEXT: vcvt.s32.f32 s2, s2
+; CHECK-NEON-NEXT: vmov.i32 q9, #0x0
+; CHECK-NEON-NEXT: vmov.32 d9[0], r0
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s22
+; CHECK-NEON-NEXT: vmov.32 d12[1], r0
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmin.s32 q10, q6, q8
+; CHECK-NEON-NEXT: vmax.s32 q10, q10, q9
+; CHECK-NEON-NEXT: vmov.32 d9[1], r0
+; CHECK-NEON-NEXT: vmov r0, s2
+; CHECK-NEON-NEXT: vmovn.i32 d1, q10
+; CHECK-NEON-NEXT: vmov.32 d8[1], r0
+; CHECK-NEON-NEXT: vmin.s32 q8, q4, q8
+; CHECK-NEON-NEXT: vmax.s32 q8, q8, q9
+; CHECK-NEON-NEXT: vmovn.i32 d0, q8
+; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
+;
+; CHECK-FP16-LABEL: ustest_f16i16:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: vmovx.f16 s4, s0
+; CHECK-FP16-NEXT: vcvt.s32.f16 s12, s0
+; CHECK-FP16-NEXT: vcvt.s32.f16 s0, s3
+; CHECK-FP16-NEXT: vcvt.s32.f16 s5, s2
+; CHECK-FP16-NEXT: vmov r0, s0
+; CHECK-FP16-NEXT: vcvt.s32.f16 s14, s1
+; CHECK-FP16-NEXT: vmovx.f16 s10, s3
+; CHECK-FP16-NEXT: vmovx.f16 s8, s2
+; CHECK-FP16-NEXT: vcvt.s32.f16 s10, s10
+; CHECK-FP16-NEXT: vcvt.s32.f16 s8, s8
+; CHECK-FP16-NEXT: vmovx.f16 s6, s1
+; CHECK-FP16-NEXT: vcvt.s32.f16 s4, s4
+; CHECK-FP16-NEXT: vcvt.s32.f16 s6, s6
+; CHECK-FP16-NEXT: vmov.i32 q10, #0xffff
+; CHECK-FP16-NEXT: vmov.i32 q11, #0x0
+; CHECK-FP16-NEXT: vmov.32 d17[0], r0
+; CHECK-FP16-NEXT: vmov r0, s5
+; CHECK-FP16-NEXT: vmov.32 d16[0], r0
+; CHECK-FP16-NEXT: vmov r0, s14
+; CHECK-FP16-NEXT: vmov.32 d19[0], r0
+; CHECK-FP16-NEXT: vmov r0, s12
+; CHECK-FP16-NEXT: vmov.32 d18[0], r0
+; CHECK-FP16-NEXT: vmov r0, s10
+; CHECK-FP16-NEXT: vmov.32 d17[1], r0
+; CHECK-FP16-NEXT: vmov r0, s8
+; CHECK-FP16-NEXT: vmov.32 d16[1], r0
+; CHECK-FP16-NEXT: vmov r0, s6
+; CHECK-FP16-NEXT: vmin.s32 q8, q8, q10
+; CHECK-FP16-NEXT: vmax.s32 q8, q8, q11
+; CHECK-FP16-NEXT: vmovn.i32 d1, q8
+; CHECK-FP16-NEXT: vmov.32 d19[1], r0
+; CHECK-FP16-NEXT: vmov r0, s4
+; CHECK-FP16-NEXT: vmov.32 d18[1], r0
+; CHECK-FP16-NEXT: vmin.s32 q9, q9, q10
+; CHECK-FP16-NEXT: vmax.s32 q9, q9, q11
+; CHECK-FP16-NEXT: vmovn.i32 d0, q9
+; CHECK-FP16-NEXT: bx lr
+entry:
+ %conv = fptosi <8 x half> %x to <8 x i32>
+ %0 = icmp slt <8 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %spec.store.select = select <8 x i1> %0, <8 x i32> %conv, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %1 = icmp sgt <8 x i32> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <8 x i1> %1, <8 x i32> %spec.store.select, <8 x i32> zeroinitializer
+ %conv6 = trunc <8 x i32> %spec.store.select7 to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+; i64 saturate
+
+define <2 x i64> @stest_f64i64(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vorr d0, d9, d9
+; CHECK-NEXT: bl __fixdfti
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mvn r8, #0
+; CHECK-NEXT: subs r0, r0, r8
+; CHECK-NEXT: mvn r6, #-2147483648
+; CHECK-NEXT: sbcs r0, r1, r6
+; CHECK-NEXT: mov r10, r1
+; CHECK-NEXT: sbcs r0, r2, #0
+; CHECK-NEXT: vorr d0, d8, d8
+; CHECK-NEXT: sbcs r0, r3, #0
+; CHECK-NEXT: mov r7, #0
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: mov r9, #0
+; CHECK-NEXT: movwlt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: moveq r3, r0
+; CHECK-NEXT: movne r0, r2
+; CHECK-NEXT: moveq r10, r6
+; CHECK-NEXT: moveq r5, r8
+; CHECK-NEXT: rsbs r1, r5, #0
+; CHECK-NEXT: rscs r1, r10, #-2147483648
+; CHECK-NEXT: sbcs r0, r8, r0
+; CHECK-NEXT: sbcs r0, r8, r3
+; CHECK-NEXT: movwlt r7, #1
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: moveq r5, r7
+; CHECK-NEXT: bl __fixdfti
+; CHECK-NEXT: subs r4, r0, r8
+; CHECK-NEXT: vmov.32 d1[0], r5
+; CHECK-NEXT: sbcs r4, r1, r6
+; CHECK-NEXT: sbcs r4, r2, #0
+; CHECK-NEXT: sbcs r4, r3, #0
+; CHECK-NEXT: mov r4, #0
+; CHECK-NEXT: movwlt r4, #1
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: moveq r3, r4
+; CHECK-NEXT: movne r6, r1
+; CHECK-NEXT: movne r4, r2
+; CHECK-NEXT: moveq r0, r8
+; CHECK-NEXT: rsbs r1, r0, #0
+; CHECK-NEXT: rscs r1, r6, #-2147483648
+; CHECK-NEXT: sbcs r1, r8, r4
+; CHECK-NEXT: sbcs r1, r8, r3
+; CHECK-NEXT: movwlt r9, #1
+; CHECK-NEXT: cmp r9, #0
+; CHECK-NEXT: moveq r0, r9
+; CHECK-NEXT: mov r1, #-2147483648
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: vmov.32 d0[0], r0
+; CHECK-NEXT: moveq r10, r1
+; CHECK-NEXT: cmp r9, #0
+; CHECK-NEXT: vmov.32 d1[1], r10
+; CHECK-NEXT: moveq r6, r1
+; CHECK-NEXT: vmov.32 d0[1], r6
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 9223372036854775807, i128 9223372036854775807>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>
+ %1 = icmp sgt <2 x i128> %spec.store.select, <i128 -9223372036854775808, i128 -9223372036854775808>
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utest_f64i64(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vorr d0, d9, d9
+; CHECK-NEXT: bl __fixunsdfti
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: subs r0, r2, #1
+; CHECK-NEXT: vorr d0, d8, d8
+; CHECK-NEXT: sbcs r0, r3, #0
+; CHECK-NEXT: mov r7, #0
+; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: movwlo r7, #1
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: moveq r5, r7
+; CHECK-NEXT: bl __fixunsdfti
+; CHECK-NEXT: subs r2, r2, #1
+; CHECK-NEXT: vmov.32 d1[0], r5
+; CHECK-NEXT: sbcs r2, r3, #0
+; CHECK-NEXT: movwlo r6, #1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: moveq r0, r6
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: movne r7, r4
+; CHECK-NEXT: vmov.32 d0[0], r0
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: vmov.32 d1[1], r7
+; CHECK-NEXT: movne r6, r1
+; CHECK-NEXT: vmov.32 d0[1], r6
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i128>
+ %0 = icmp ult <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f64i64(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vorr d0, d9, d9
+; CHECK-NEXT: bl __fixdfti
+; CHECK-NEXT: mov r8, r1
+; CHECK-NEXT: subs r1, r2, #1
+; CHECK-NEXT: sbcs r1, r3, #0
+; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: movwlt r6, #1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: mov r9, #1
+; CHECK-NEXT: moveq r3, r6
+; CHECK-NEXT: moveq r8, r6
+; CHECK-NEXT: moveq r2, r9
+; CHECK-NEXT: movne r6, r0
+; CHECK-NEXT: rsbs r0, r6, #0
+; CHECK-NEXT: rscs r0, r8, #0
+; CHECK-NEXT: vorr d0, d8, d8
+; CHECK-NEXT: rscs r0, r2, #0
+; CHECK-NEXT: mov r7, #0
+; CHECK-NEXT: rscs r0, r3, #0
+; CHECK-NEXT: mov r5, #0
+; CHECK-NEXT: movwlt r7, #1
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: moveq r6, r7
+; CHECK-NEXT: bl __fixdfti
+; CHECK-NEXT: subs r4, r2, #1
+; CHECK-NEXT: vmov.32 d1[0], r6
+; CHECK-NEXT: sbcs r4, r3, #0
+; CHECK-NEXT: mov r4, #0
+; CHECK-NEXT: movwlt r4, #1
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: movne r9, r2
+; CHECK-NEXT: moveq r3, r4
+; CHECK-NEXT: moveq r1, r4
+; CHECK-NEXT: movne r4, r0
+; CHECK-NEXT: rsbs r0, r4, #0
+; CHECK-NEXT: rscs r0, r1, #0
+; CHECK-NEXT: rscs r0, r9, #0
+; CHECK-NEXT: rscs r0, r3, #0
+; CHECK-NEXT: movwlt r5, #1
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: moveq r4, r5
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: movne r7, r8
+; CHECK-NEXT: vmov.32 d0[0], r4
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: vmov.32 d1[1], r7
+; CHECK-NEXT: movne r5, r1
+; CHECK-NEXT: vmov.32 d0[1], r5
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %1 = icmp sgt <2 x i128> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> zeroinitializer
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @stest_f32i64(<2 x float> %x) {
+; CHECK-LABEL: stest_f32i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT: .vsave {d8}
+; CHECK-NEXT: vpush {d8}
+; CHECK-NEXT: vmov.f64 d8, d0
+; CHECK-NEXT: vmov.f32 s0, s17
+; CHECK-NEXT: bl __fixsfti
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mvn r8, #0
+; CHECK-NEXT: subs r0, r0, r8
+; CHECK-NEXT: mvn r6, #-2147483648
+; CHECK-NEXT: sbcs r0, r1, r6
+; CHECK-NEXT: vmov.f32 s0, s16
+; CHECK-NEXT: sbcs r0, r2, #0
+; CHECK-NEXT: mov r10, r1
+; CHECK-NEXT: sbcs r0, r3, #0
+; CHECK-NEXT: mov r7, #0
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: mov r9, #0
+; CHECK-NEXT: movwlt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: moveq r3, r0
+; CHECK-NEXT: movne r0, r2
+; CHECK-NEXT: moveq r10, r6
+; CHECK-NEXT: moveq r5, r8
+; CHECK-NEXT: rsbs r1, r5, #0
+; CHECK-NEXT: rscs r1, r10, #-2147483648
+; CHECK-NEXT: sbcs r0, r8, r0
+; CHECK-NEXT: sbcs r0, r8, r3
+; CHECK-NEXT: movwlt r7, #1
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: moveq r5, r7
+; CHECK-NEXT: bl __fixsfti
+; CHECK-NEXT: subs r4, r0, r8
+; CHECK-NEXT: vmov.32 d1[0], r5
+; CHECK-NEXT: sbcs r4, r1, r6
+; CHECK-NEXT: sbcs r4, r2, #0
+; CHECK-NEXT: sbcs r4, r3, #0
+; CHECK-NEXT: mov r4, #0
+; CHECK-NEXT: movwlt r4, #1
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: moveq r3, r4
+; CHECK-NEXT: movne r6, r1
+; CHECK-NEXT: movne r4, r2
+; CHECK-NEXT: moveq r0, r8
+; CHECK-NEXT: rsbs r1, r0, #0
+; CHECK-NEXT: rscs r1, r6, #-2147483648
+; CHECK-NEXT: sbcs r1, r8, r4
+; CHECK-NEXT: sbcs r1, r8, r3
+; CHECK-NEXT: movwlt r9, #1
+; CHECK-NEXT: cmp r9, #0
+; CHECK-NEXT: moveq r0, r9
+; CHECK-NEXT: mov r1, #-2147483648
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: vmov.32 d0[0], r0
+; CHECK-NEXT: moveq r10, r1
+; CHECK-NEXT: cmp r9, #0
+; CHECK-NEXT: vmov.32 d1[1], r10
+; CHECK-NEXT: moveq r6, r1
+; CHECK-NEXT: vmov.32 d0[1], r6
+; CHECK-NEXT: vpop {d8}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+entry:
+ %conv = fptosi <2 x float> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 9223372036854775807, i128 9223372036854775807>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>
+ %1 = icmp sgt <2 x i128> %spec.store.select, <i128 -9223372036854775808, i128 -9223372036854775808>
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utest_f32i64(<2 x float> %x) {
+; CHECK-LABEL: utest_f32i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: .vsave {d8}
+; CHECK-NEXT: vpush {d8}
+; CHECK-NEXT: vmov.f64 d8, d0
+; CHECK-NEXT: vmov.f32 s0, s17
+; CHECK-NEXT: bl __fixunssfti
+; CHECK-NEXT: vmov.f32 s0, s16
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: subs r0, r2, #1
+; CHECK-NEXT: mov r7, #0
+; CHECK-NEXT: sbcs r0, r3, #0
+; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: movwlo r7, #1
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: moveq r5, r7
+; CHECK-NEXT: bl __fixunssfti
+; CHECK-NEXT: subs r2, r2, #1
+; CHECK-NEXT: vmov.32 d1[0], r5
+; CHECK-NEXT: sbcs r2, r3, #0
+; CHECK-NEXT: movwlo r6, #1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: moveq r0, r6
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: movne r7, r4
+; CHECK-NEXT: vmov.32 d0[0], r0
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: vmov.32 d1[1], r7
+; CHECK-NEXT: movne r6, r1
+; CHECK-NEXT: vmov.32 d0[1], r6
+; CHECK-NEXT: vpop {d8}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
+entry:
+ %conv = fptoui <2 x float> %x to <2 x i128>
+ %0 = icmp ult <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f32i64(<2 x float> %x) {
+; CHECK-LABEL: ustest_f32i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEXT: .vsave {d8}
+; CHECK-NEXT: vpush {d8}
+; CHECK-NEXT: vmov.f64 d8, d0
+; CHECK-NEXT: vmov.f32 s0, s17
+; CHECK-NEXT: bl __fixsfti
+; CHECK-NEXT: mov r8, r1
+; CHECK-NEXT: subs r1, r2, #1
+; CHECK-NEXT: vmov.f32 s0, s16
+; CHECK-NEXT: sbcs r1, r3, #0
+; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: mov r9, #1
+; CHECK-NEXT: movwlt r6, #1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: moveq r3, r6
+; CHECK-NEXT: moveq r8, r6
+; CHECK-NEXT: moveq r2, r9
+; CHECK-NEXT: movne r6, r0
+; CHECK-NEXT: rsbs r0, r6, #0
+; CHECK-NEXT: mov r7, #0
+; CHECK-NEXT: rscs r0, r8, #0
+; CHECK-NEXT: mov r5, #0
+; CHECK-NEXT: rscs r0, r2, #0
+; CHECK-NEXT: rscs r0, r3, #0
+; CHECK-NEXT: movwlt r7, #1
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: moveq r6, r7
+; CHECK-NEXT: bl __fixsfti
+; CHECK-NEXT: subs r4, r2, #1
+; CHECK-NEXT: vmov.32 d1[0], r6
+; CHECK-NEXT: sbcs r4, r3, #0
+; CHECK-NEXT: mov r4, #0
+; CHECK-NEXT: movwlt r4, #1
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: movne r9, r2
+; CHECK-NEXT: moveq r3, r4
+; CHECK-NEXT: moveq r1, r4
+; CHECK-NEXT: movne r4, r0
+; CHECK-NEXT: rsbs r0, r4, #0
+; CHECK-NEXT: rscs r0, r1, #0
+; CHECK-NEXT: rscs r0, r9, #0
+; CHECK-NEXT: rscs r0, r3, #0
+; CHECK-NEXT: movwlt r5, #1
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: moveq r4, r5
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: movne r7, r8
+; CHECK-NEXT: vmov.32 d0[0], r4
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: vmov.32 d1[1], r7
+; CHECK-NEXT: movne r5, r1
+; CHECK-NEXT: vmov.32 d0[1], r5
+; CHECK-NEXT: vpop {d8}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
+entry:
+ %conv = fptosi <2 x float> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %1 = icmp sgt <2 x i128> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> zeroinitializer
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @stest_f16i64(<2 x half> %x) {
+; CHECK-NEON-LABEL: stest_f16i64:
+; CHECK-NEON: @ %bb.0: @ %entry
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEON-NEXT: .pad #4
+; CHECK-NEON-NEXT: sub sp, sp, #4
+; CHECK-NEON-NEXT: .vsave {d8}
+; CHECK-NEON-NEXT: vpush {d8}
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.f32 s16, s1
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r8, r0
+; CHECK-NEON-NEXT: vmov r0, s16
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: bl __fixsfti
+; CHECK-NEON-NEXT: mov r5, r0
+; CHECK-NEON-NEXT: mvn r9, #0
+; CHECK-NEON-NEXT: subs r0, r0, r9
+; CHECK-NEON-NEXT: mvn r7, #-2147483648
+; CHECK-NEON-NEXT: sbcs r0, r1, r7
+; CHECK-NEON-NEXT: mov r11, r1
+; CHECK-NEON-NEXT: sbcs r0, r2, #0
+; CHECK-NEON-NEXT: vmov s0, r8
+; CHECK-NEON-NEXT: sbcs r0, r3, #0
+; CHECK-NEON-NEXT: mov r6, #0
+; CHECK-NEON-NEXT: mov r0, #0
+; CHECK-NEON-NEXT: mov r10, #0
+; CHECK-NEON-NEXT: movwlt r0, #1
+; CHECK-NEON-NEXT: cmp r0, #0
+; CHECK-NEON-NEXT: moveq r3, r0
+; CHECK-NEON-NEXT: movne r0, r2
+; CHECK-NEON-NEXT: moveq r11, r7
+; CHECK-NEON-NEXT: moveq r5, r9
+; CHECK-NEON-NEXT: rsbs r1, r5, #0
+; CHECK-NEON-NEXT: rscs r1, r11, #-2147483648
+; CHECK-NEON-NEXT: sbcs r0, r9, r0
+; CHECK-NEON-NEXT: sbcs r0, r9, r3
+; CHECK-NEON-NEXT: movwlt r6, #1
+; CHECK-NEON-NEXT: cmp r6, #0
+; CHECK-NEON-NEXT: moveq r5, r6
+; CHECK-NEON-NEXT: bl __fixsfti
+; CHECK-NEON-NEXT: subs r4, r0, r9
+; CHECK-NEON-NEXT: vmov.32 d1[0], r5
+; CHECK-NEON-NEXT: sbcs r4, r1, r7
+; CHECK-NEON-NEXT: sbcs r4, r2, #0
+; CHECK-NEON-NEXT: sbcs r4, r3, #0
+; CHECK-NEON-NEXT: mov r4, #0
+; CHECK-NEON-NEXT: movwlt r4, #1
+; CHECK-NEON-NEXT: cmp r4, #0
+; CHECK-NEON-NEXT: moveq r3, r4
+; CHECK-NEON-NEXT: movne r7, r1
+; CHECK-NEON-NEXT: movne r4, r2
+; CHECK-NEON-NEXT: moveq r0, r9
+; CHECK-NEON-NEXT: rsbs r1, r0, #0
+; CHECK-NEON-NEXT: rscs r1, r7, #-2147483648
+; CHECK-NEON-NEXT: sbcs r1, r9, r4
+; CHECK-NEON-NEXT: sbcs r1, r9, r3
+; CHECK-NEON-NEXT: movwlt r10, #1
+; CHECK-NEON-NEXT: cmp r10, #0
+; CHECK-NEON-NEXT: moveq r0, r10
+; CHECK-NEON-NEXT: mov r1, #-2147483648
+; CHECK-NEON-NEXT: cmp r6, #0
+; CHECK-NEON-NEXT: vmov.32 d0[0], r0
+; CHECK-NEON-NEXT: moveq r11, r1
+; CHECK-NEON-NEXT: cmp r10, #0
+; CHECK-NEON-NEXT: vmov.32 d1[1], r11
+; CHECK-NEON-NEXT: moveq r7, r1
+; CHECK-NEON-NEXT: vmov.32 d0[1], r7
+; CHECK-NEON-NEXT: vpop {d8}
+; CHECK-NEON-NEXT: add sp, sp, #4
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; CHECK-FP16-LABEL: stest_f16i64:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-FP16-NEXT: vmov.u16 r0, d0[1]
+; CHECK-FP16-NEXT: vmov.u16 r7, d0[0]
+; CHECK-FP16-NEXT: vmov s0, r0
+; CHECK-FP16-NEXT: bl __fixhfti
+; CHECK-FP16-NEXT: mov r5, r0
+; CHECK-FP16-NEXT: mvn r8, #0
+; CHECK-FP16-NEXT: subs r0, r0, r8
+; CHECK-FP16-NEXT: mvn r6, #-2147483648
+; CHECK-FP16-NEXT: sbcs r0, r1, r6
+; CHECK-FP16-NEXT: mov r10, r1
+; CHECK-FP16-NEXT: sbcs r0, r2, #0
+; CHECK-FP16-NEXT: vmov s0, r7
+; CHECK-FP16-NEXT: sbcs r0, r3, #0
+; CHECK-FP16-NEXT: mov r7, #0
+; CHECK-FP16-NEXT: mov r0, #0
+; CHECK-FP16-NEXT: mov r9, #0
+; CHECK-FP16-NEXT: movwlt r0, #1
+; CHECK-FP16-NEXT: cmp r0, #0
+; CHECK-FP16-NEXT: moveq r3, r0
+; CHECK-FP16-NEXT: movne r0, r2
+; CHECK-FP16-NEXT: moveq r10, r6
+; CHECK-FP16-NEXT: moveq r5, r8
+; CHECK-FP16-NEXT: rsbs r1, r5, #0
+; CHECK-FP16-NEXT: rscs r1, r10, #-2147483648
+; CHECK-FP16-NEXT: sbcs r0, r8, r0
+; CHECK-FP16-NEXT: sbcs r0, r8, r3
+; CHECK-FP16-NEXT: movwlt r7, #1
+; CHECK-FP16-NEXT: cmp r7, #0
+; CHECK-FP16-NEXT: moveq r5, r7
+; CHECK-FP16-NEXT: bl __fixhfti
+; CHECK-FP16-NEXT: subs r4, r0, r8
+; CHECK-FP16-NEXT: vmov.32 d1[0], r5
+; CHECK-FP16-NEXT: sbcs r4, r1, r6
+; CHECK-FP16-NEXT: sbcs r4, r2, #0
+; CHECK-FP16-NEXT: sbcs r4, r3, #0
+; CHECK-FP16-NEXT: mov r4, #0
+; CHECK-FP16-NEXT: movwlt r4, #1
+; CHECK-FP16-NEXT: cmp r4, #0
+; CHECK-FP16-NEXT: moveq r3, r4
+; CHECK-FP16-NEXT: movne r6, r1
+; CHECK-FP16-NEXT: movne r4, r2
+; CHECK-FP16-NEXT: moveq r0, r8
+; CHECK-FP16-NEXT: rsbs r1, r0, #0
+; CHECK-FP16-NEXT: rscs r1, r6, #-2147483648
+; CHECK-FP16-NEXT: sbcs r1, r8, r4
+; CHECK-FP16-NEXT: sbcs r1, r8, r3
+; CHECK-FP16-NEXT: movwlt r9, #1
+; CHECK-FP16-NEXT: cmp r9, #0
+; CHECK-FP16-NEXT: moveq r0, r9
+; CHECK-FP16-NEXT: mov r1, #-2147483648
+; CHECK-FP16-NEXT: cmp r7, #0
+; CHECK-FP16-NEXT: vmov.32 d0[0], r0
+; CHECK-FP16-NEXT: moveq r10, r1
+; CHECK-FP16-NEXT: cmp r9, #0
+; CHECK-FP16-NEXT: vmov.32 d1[1], r10
+; CHECK-FP16-NEXT: moveq r6, r1
+; CHECK-FP16-NEXT: vmov.32 d0[1], r6
+; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+entry:
+ %conv = fptosi <2 x half> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 9223372036854775807, i128 9223372036854775807>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>
+ %1 = icmp sgt <2 x i128> %spec.store.select, <i128 -9223372036854775808, i128 -9223372036854775808>
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utesth_f16i64(<2 x half> %x) {
+; CHECK-NEON-LABEL: utesth_f16i64:
+; CHECK-NEON: @ %bb.0: @ %entry
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEON-NEXT: .vsave {d8}
+; CHECK-NEON-NEXT: vpush {d8}
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.f32 s16, s1
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r5, r0
+; CHECK-NEON-NEXT: vmov r0, s16
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: bl __fixunssfti
+; CHECK-NEON-NEXT: mov r6, r0
+; CHECK-NEON-NEXT: subs r0, r2, #1
+; CHECK-NEON-NEXT: vmov s0, r5
+; CHECK-NEON-NEXT: sbcs r0, r3, #0
+; CHECK-NEON-NEXT: mov r5, #0
+; CHECK-NEON-NEXT: mov r4, r1
+; CHECK-NEON-NEXT: movwlo r5, #1
+; CHECK-NEON-NEXT: cmp r5, #0
+; CHECK-NEON-NEXT: mov r7, #0
+; CHECK-NEON-NEXT: moveq r6, r5
+; CHECK-NEON-NEXT: bl __fixunssfti
+; CHECK-NEON-NEXT: subs r2, r2, #1
+; CHECK-NEON-NEXT: vmov.32 d1[0], r6
+; CHECK-NEON-NEXT: sbcs r2, r3, #0
+; CHECK-NEON-NEXT: movwlo r7, #1
+; CHECK-NEON-NEXT: cmp r7, #0
+; CHECK-NEON-NEXT: moveq r0, r7
+; CHECK-NEON-NEXT: cmp r5, #0
+; CHECK-NEON-NEXT: movne r5, r4
+; CHECK-NEON-NEXT: vmov.32 d0[0], r0
+; CHECK-NEON-NEXT: cmp r7, #0
+; CHECK-NEON-NEXT: vmov.32 d1[1], r5
+; CHECK-NEON-NEXT: movne r7, r1
+; CHECK-NEON-NEXT: vmov.32 d0[1], r7
+; CHECK-NEON-NEXT: vpop {d8}
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
+;
+; CHECK-FP16-LABEL: utesth_f16i64:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-FP16-NEXT: vmov.u16 r0, d0[1]
+; CHECK-FP16-NEXT: vmov.u16 r7, d0[0]
+; CHECK-FP16-NEXT: vmov s0, r0
+; CHECK-FP16-NEXT: bl __fixunshfti
+; CHECK-FP16-NEXT: mov r5, r0
+; CHECK-FP16-NEXT: subs r0, r2, #1
+; CHECK-FP16-NEXT: vmov s0, r7
+; CHECK-FP16-NEXT: sbcs r0, r3, #0
+; CHECK-FP16-NEXT: mov r7, #0
+; CHECK-FP16-NEXT: mov r4, r1
+; CHECK-FP16-NEXT: movwlo r7, #1
+; CHECK-FP16-NEXT: cmp r7, #0
+; CHECK-FP16-NEXT: mov r6, #0
+; CHECK-FP16-NEXT: moveq r5, r7
+; CHECK-FP16-NEXT: bl __fixunshfti
+; CHECK-FP16-NEXT: subs r2, r2, #1
+; CHECK-FP16-NEXT: vmov.32 d1[0], r5
+; CHECK-FP16-NEXT: sbcs r2, r3, #0
+; CHECK-FP16-NEXT: movwlo r6, #1
+; CHECK-FP16-NEXT: cmp r6, #0
+; CHECK-FP16-NEXT: moveq r0, r6
+; CHECK-FP16-NEXT: cmp r7, #0
+; CHECK-FP16-NEXT: movne r7, r4
+; CHECK-FP16-NEXT: vmov.32 d0[0], r0
+; CHECK-FP16-NEXT: cmp r6, #0
+; CHECK-FP16-NEXT: vmov.32 d1[1], r7
+; CHECK-FP16-NEXT: movne r6, r1
+; CHECK-FP16-NEXT: vmov.32 d0[1], r6
+; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r11, pc}
+entry:
+ %conv = fptoui <2 x half> %x to <2 x i128>
+ %0 = icmp ult <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f16i64(<2 x half> %x) {
+; CHECK-NEON-LABEL: ustest_f16i64:
+; CHECK-NEON: @ %bb.0: @ %entry
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEON-NEXT: .vsave {d8}
+; CHECK-NEON-NEXT: vpush {d8}
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.f32 s16, s1
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r5, r0
+; CHECK-NEON-NEXT: vmov r0, s16
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: bl __fixsfti
+; CHECK-NEON-NEXT: mov r8, r1
+; CHECK-NEON-NEXT: subs r1, r2, #1
+; CHECK-NEON-NEXT: vmov s0, r5
+; CHECK-NEON-NEXT: sbcs r1, r3, #0
+; CHECK-NEON-NEXT: mov r5, #0
+; CHECK-NEON-NEXT: mov r9, #1
+; CHECK-NEON-NEXT: movwlt r5, #1
+; CHECK-NEON-NEXT: cmp r5, #0
+; CHECK-NEON-NEXT: moveq r3, r5
+; CHECK-NEON-NEXT: moveq r8, r5
+; CHECK-NEON-NEXT: moveq r2, r9
+; CHECK-NEON-NEXT: movne r5, r0
+; CHECK-NEON-NEXT: rsbs r0, r5, #0
+; CHECK-NEON-NEXT: mov r7, #0
+; CHECK-NEON-NEXT: rscs r0, r8, #0
+; CHECK-NEON-NEXT: mov r6, #0
+; CHECK-NEON-NEXT: rscs r0, r2, #0
+; CHECK-NEON-NEXT: rscs r0, r3, #0
+; CHECK-NEON-NEXT: movwlt r7, #1
+; CHECK-NEON-NEXT: cmp r7, #0
+; CHECK-NEON-NEXT: moveq r5, r7
+; CHECK-NEON-NEXT: bl __fixsfti
+; CHECK-NEON-NEXT: subs r4, r2, #1
+; CHECK-NEON-NEXT: vmov.32 d1[0], r5
+; CHECK-NEON-NEXT: sbcs r4, r3, #0
+; CHECK-NEON-NEXT: mov r4, #0
+; CHECK-NEON-NEXT: movwlt r4, #1
+; CHECK-NEON-NEXT: cmp r4, #0
+; CHECK-NEON-NEXT: movne r9, r2
+; CHECK-NEON-NEXT: moveq r3, r4
+; CHECK-NEON-NEXT: moveq r1, r4
+; CHECK-NEON-NEXT: movne r4, r0
+; CHECK-NEON-NEXT: rsbs r0, r4, #0
+; CHECK-NEON-NEXT: rscs r0, r1, #0
+; CHECK-NEON-NEXT: rscs r0, r9, #0
+; CHECK-NEON-NEXT: rscs r0, r3, #0
+; CHECK-NEON-NEXT: movwlt r6, #1
+; CHECK-NEON-NEXT: cmp r6, #0
+; CHECK-NEON-NEXT: moveq r4, r6
+; CHECK-NEON-NEXT: cmp r7, #0
+; CHECK-NEON-NEXT: movne r7, r8
+; CHECK-NEON-NEXT: vmov.32 d0[0], r4
+; CHECK-NEON-NEXT: cmp r6, #0
+; CHECK-NEON-NEXT: vmov.32 d1[1], r7
+; CHECK-NEON-NEXT: movne r6, r1
+; CHECK-NEON-NEXT: vmov.32 d0[1], r6
+; CHECK-NEON-NEXT: vpop {d8}
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; CHECK-FP16-LABEL: ustest_f16i64:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-FP16-NEXT: vmov.u16 r0, d0[1]
+; CHECK-FP16-NEXT: vmov.u16 r4, d0[0]
+; CHECK-FP16-NEXT: vmov s0, r0
+; CHECK-FP16-NEXT: bl __fixhfti
+; CHECK-FP16-NEXT: mov r8, r1
+; CHECK-FP16-NEXT: subs r1, r2, #1
+; CHECK-FP16-NEXT: sbcs r1, r3, #0
+; CHECK-FP16-NEXT: mov r6, #0
+; CHECK-FP16-NEXT: movwlt r6, #1
+; CHECK-FP16-NEXT: cmp r6, #0
+; CHECK-FP16-NEXT: mov r9, #1
+; CHECK-FP16-NEXT: moveq r3, r6
+; CHECK-FP16-NEXT: moveq r8, r6
+; CHECK-FP16-NEXT: moveq r2, r9
+; CHECK-FP16-NEXT: movne r6, r0
+; CHECK-FP16-NEXT: rsbs r0, r6, #0
+; CHECK-FP16-NEXT: rscs r0, r8, #0
+; CHECK-FP16-NEXT: vmov s0, r4
+; CHECK-FP16-NEXT: rscs r0, r2, #0
+; CHECK-FP16-NEXT: mov r7, #0
+; CHECK-FP16-NEXT: rscs r0, r3, #0
+; CHECK-FP16-NEXT: mov r5, #0
+; CHECK-FP16-NEXT: movwlt r7, #1
+; CHECK-FP16-NEXT: cmp r7, #0
+; CHECK-FP16-NEXT: moveq r6, r7
+; CHECK-FP16-NEXT: bl __fixhfti
+; CHECK-FP16-NEXT: subs r4, r2, #1
+; CHECK-FP16-NEXT: vmov.32 d1[0], r6
+; CHECK-FP16-NEXT: sbcs r4, r3, #0
+; CHECK-FP16-NEXT: mov r4, #0
+; CHECK-FP16-NEXT: movwlt r4, #1
+; CHECK-FP16-NEXT: cmp r4, #0
+; CHECK-FP16-NEXT: movne r9, r2
+; CHECK-FP16-NEXT: moveq r3, r4
+; CHECK-FP16-NEXT: moveq r1, r4
+; CHECK-FP16-NEXT: movne r4, r0
+; CHECK-FP16-NEXT: rsbs r0, r4, #0
+; CHECK-FP16-NEXT: rscs r0, r1, #0
+; CHECK-FP16-NEXT: rscs r0, r9, #0
+; CHECK-FP16-NEXT: rscs r0, r3, #0
+; CHECK-FP16-NEXT: movwlt r5, #1
+; CHECK-FP16-NEXT: cmp r5, #0
+; CHECK-FP16-NEXT: moveq r4, r5
+; CHECK-FP16-NEXT: cmp r7, #0
+; CHECK-FP16-NEXT: movne r7, r8
+; CHECK-FP16-NEXT: vmov.32 d0[0], r4
+; CHECK-FP16-NEXT: cmp r5, #0
+; CHECK-FP16-NEXT: vmov.32 d1[1], r7
+; CHECK-FP16-NEXT: movne r5, r1
+; CHECK-FP16-NEXT: vmov.32 d0[1], r5
+; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
+entry:
+ %conv = fptosi <2 x half> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %1 = icmp sgt <2 x i128> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> zeroinitializer
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+
+
+; i32 saturate
+
+define <2 x i32> @stest_f64i32_mm(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i32_mm:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vmov r0, r1, d8
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: mov r2, r1
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: mov r3, #0
+; CHECK-NEXT: clz r7, r2
+; CHECK-NEXT: movwmi r3, #1
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: mvn r3, #-2147483648
+; CHECK-NEXT: mvn r5, #-2147483648
+; CHECK-NEXT: movne r3, r4
+; CHECK-NEXT: cmn r4, #-2147483647
+; CHECK-NEXT: movhs r4, r5
+; CHECK-NEXT: lsr r7, r7, #5
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: moveq r4, r3
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: movpl r2, r6
+; CHECK-NEXT: cmn r2, #1
+; CHECK-NEXT: mov r3, #0
+; CHECK-NEXT: add r2, r2, #1
+; CHECK-NEXT: movwgt r3, #1
+; CHECK-NEXT: clz r2, r2
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: mov r3, #-2147483648
+; CHECK-NEXT: movne r3, r4
+; CHECK-NEXT: mov r7, #-2147483648
+; CHECK-NEXT: cmp r4, #-2147483648
+; CHECK-NEXT: lsr r2, r2, #5
+; CHECK-NEXT: movls r4, r7
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: moveq r4, r3
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movwmi r2, #1
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: mvn r2, #-2147483648
+; CHECK-NEXT: vmov.32 d0[0], r4
+; CHECK-NEXT: movne r2, r0
+; CHECK-NEXT: cmn r0, #-2147483647
+; CHECK-NEXT: movlo r5, r0
+; CHECK-NEXT: clz r0, r1
+; CHECK-NEXT: lsr r0, r0, #5
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: moveq r5, r2
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: movpl r1, r6
+; CHECK-NEXT: cmn r1, #1
+; CHECK-NEXT: add r1, r1, #1
+; CHECK-NEXT: movwgt r6, #1
+; CHECK-NEXT: clz r1, r1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: mov r0, #-2147483648
+; CHECK-NEXT: movne r0, r5
+; CHECK-NEXT: cmp r5, #-2147483648
+; CHECK-NEXT: movls r5, r7
+; CHECK-NEXT: lsr r1, r1, #5
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: moveq r5, r0
+; CHECK-NEXT: vmov.32 d0[1], r5
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i64>
+ %spec.store.select = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %conv, <2 x i64> <i64 2147483647, i64 2147483647>)
+ %spec.store.select7 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %spec.store.select, <2 x i64> <i64 -2147483648, i64 -2147483648>)
+ %conv6 = trunc <2 x i64> %spec.store.select7 to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <2 x i32> @utest_f64i32_mm(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i32_mm:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: vmov r2, r1, d8
+; CHECK-NEXT: vmov.32 d9[0], r0
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: bl __aeabi_d2ulz
+; CHECK-NEXT: vmov.32 d8[0], r0
+; CHECK-NEXT: vmov.i64 q8, #0xffffffff
+; CHECK-NEXT: vmov.32 d9[1], r4
+; CHECK-NEXT: vmov.32 d8[1], r1
+; CHECK-NEXT: vqsub.u64 q8, q4, q8
+; CHECK-NEXT: vsub.i64 q8, q4, q8
+; CHECK-NEXT: vmovn.i64 d0, q8
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r4, pc}
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i64>
+ %spec.store.select = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %conv, <2 x i64> <i64 4294967295, i64 4294967295>)
+ %conv6 = trunc <2 x i64> %spec.store.select to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i32_mm:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: vmov r2, r1, d8
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: mov r3, #0
+; CHECK-NEXT: movwmi r3, #1
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: clz r3, r4
+; CHECK-NEXT: mvn r8, #0
+; CHECK-NEXT: movne r8, r0
+; CHECK-NEXT: mov r5, #0
+; CHECK-NEXT: lsr r3, r3, #5
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: movne r8, r0
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: movpl r4, r5
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: mvn r7, #0
+; CHECK-NEXT: movwgt r6, #1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: movne r6, r8
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: bl __aeabi_d2lz
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movwmi r2, #1
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: clz r2, r1
+; CHECK-NEXT: movne r7, r0
+; CHECK-NEXT: lsr r2, r2, #5
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: movne r7, r0
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: movpl r1, r5
+; CHECK-NEXT: clz r0, r1
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: movwgt r5, #1
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: movne r5, r7
+; CHECK-NEXT: lsr r0, r0, #5
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: clz r0, r4
+; CHECK-NEXT: movne r5, r7
+; CHECK-NEXT: vmov.32 d0[0], r5
+; CHECK-NEXT: lsr r0, r0, #5
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: movne r6, r8
+; CHECK-NEXT: vmov.32 d0[1], r6
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, pc}
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i64>
+ %spec.store.select = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %conv, <2 x i64> <i64 4294967295, i64 4294967295>)
+ %spec.store.select7 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %spec.store.select, <2 x i64> zeroinitializer)
+ %conv6 = trunc <2 x i64> %spec.store.select7 to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <4 x i32> @stest_f32i32_mm(<4 x float> %x) {
+; CHECK-LABEL: stest_f32i32_mm:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: mov r8, #-2147483648
+; CHECK-NEXT: mvn r7, #-2147483648
+; CHECK-NEXT: mov r9, #0
+; CHECK-NEXT: vmov r0, s19
+; CHECK-NEXT: vmov r5, s18
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: clz r2, r1
+; CHECK-NEXT: movwmi r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mvn r0, #-2147483648
+; CHECK-NEXT: lsr r2, r2, #5
+; CHECK-NEXT: movne r0, r4
+; CHECK-NEXT: cmn r4, #-2147483647
+; CHECK-NEXT: movhs r4, r7
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: moveq r4, r0
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: movpl r1, r9
+; CHECK-NEXT: cmn r1, #1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: add r1, r1, #1
+; CHECK-NEXT: movwgt r0, #1
+; CHECK-NEXT: clz r1, r1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, #-2147483648
+; CHECK-NEXT: movne r0, r4
+; CHECK-NEXT: cmp r4, #-2147483648
+; CHECK-NEXT: movls r4, r8
+; CHECK-NEXT: lsr r1, r1, #5
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: moveq r4, r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: clz r2, r1
+; CHECK-NEXT: movwmi r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mvn r0, #-2147483648
+; CHECK-NEXT: lsr r2, r2, #5
+; CHECK-NEXT: movne r0, r5
+; CHECK-NEXT: cmn r5, #-2147483647
+; CHECK-NEXT: movhs r5, r7
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: moveq r5, r0
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: movpl r1, r9
+; CHECK-NEXT: cmn r1, #1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: mov r2, #-2147483648
+; CHECK-NEXT: movwgt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: add r0, r1, #1
+; CHECK-NEXT: movne r2, r5
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: cmp r5, #-2147483648
+; CHECK-NEXT: movls r5, r8
+; CHECK-NEXT: lsr r1, r0, #5
+; CHECK-NEXT: vmov r0, s16
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: moveq r5, r2
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: clz r2, r1
+; CHECK-NEXT: movwmi r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mvn r0, #-2147483648
+; CHECK-NEXT: lsr r2, r2, #5
+; CHECK-NEXT: movne r0, r6
+; CHECK-NEXT: cmn r6, #-2147483647
+; CHECK-NEXT: movhs r6, r7
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: moveq r6, r0
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: movpl r1, r9
+; CHECK-NEXT: cmn r1, #1
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: mov r2, #-2147483648
+; CHECK-NEXT: movwgt r0, #1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: add r0, r1, #1
+; CHECK-NEXT: movne r2, r6
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: cmp r6, #-2147483648
+; CHECK-NEXT: movls r6, r8
+; CHECK-NEXT: lsr r1, r0, #5
+; CHECK-NEXT: vmov r0, s17
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: moveq r6, r2
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movwmi r2, #1
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: mvn r2, #-2147483648
+; CHECK-NEXT: vmov.32 d0[0], r6
+; CHECK-NEXT: movne r2, r0
+; CHECK-NEXT: cmn r0, #-2147483647
+; CHECK-NEXT: movlo r7, r0
+; CHECK-NEXT: clz r0, r1
+; CHECK-NEXT: vmov.32 d1[0], r5
+; CHECK-NEXT: lsr r0, r0, #5
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: moveq r7, r2
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: movpl r1, r9
+; CHECK-NEXT: cmn r1, #1
+; CHECK-NEXT: add r1, r1, #1
+; CHECK-NEXT: movwgt r9, #1
+; CHECK-NEXT: clz r1, r1
+; CHECK-NEXT: cmp r9, #0
+; CHECK-NEXT: mov r0, #-2147483648
+; CHECK-NEXT: vmov.32 d1[1], r4
+; CHECK-NEXT: movne r0, r7
+; CHECK-NEXT: cmp r7, #-2147483648
+; CHECK-NEXT: movls r7, r8
+; CHECK-NEXT: lsr r1, r1, #5
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: moveq r7, r0
+; CHECK-NEXT: vmov.32 d0[1], r7
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
+ %spec.store.select7 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %spec.store.select, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @utest_f32i32_mm(<4 x float> %x) {
+; CHECK-LABEL: utest_f32i32_mm:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vmov r0, s17
+; CHECK-NEXT: bl __aeabi_f2ulz
+; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: vmov r1, s18
+; CHECK-NEXT: vmov r5, s19
+; CHECK-NEXT: vmov r6, s16
+; CHECK-NEXT: vmov.32 d9[0], r0
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: bl __aeabi_f2ulz
+; CHECK-NEXT: vmov.32 d10[0], r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r7, r1
+; CHECK-NEXT: bl __aeabi_f2ulz
+; CHECK-NEXT: vmov.32 d11[0], r0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: bl __aeabi_f2ulz
+; CHECK-NEXT: vmov.32 d8[0], r0
+; CHECK-NEXT: vmov.i64 q8, #0xffffffff
+; CHECK-NEXT: vmov.32 d11[1], r5
+; CHECK-NEXT: vmov.32 d9[1], r4
+; CHECK-NEXT: vmov.32 d10[1], r7
+; CHECK-NEXT: vmov.32 d8[1], r1
+; CHECK-NEXT: vqsub.u64 q9, q5, q8
+; CHECK-NEXT: vqsub.u64 q8, q4, q8
+; CHECK-NEXT: vsub.i64 q9, q5, q9
+; CHECK-NEXT: vsub.i64 q8, q4, q8
+; CHECK-NEXT: vmovn.i64 d1, q9
+; CHECK-NEXT: vmovn.i64 d0, q8
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
+entry:
+ %conv = fptoui <4 x float> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
+ %conv6 = trunc <4 x i64> %spec.store.select to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
+; CHECK-LABEL: ustest_f32i32_mm:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, sp, #4
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vmov r0, s19
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: mov r2, r0
+; CHECK-NEXT: vmov r0, s17
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: mov r3, #0
+; CHECK-NEXT: movwmi r3, #1
+; CHECK-NEXT: clz r6, r1
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: mvn r3, #0
+; CHECK-NEXT: movne r3, r2
+; CHECK-NEXT: lsr r6, r6, #5
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: mov r7, #0
+; CHECK-NEXT: movne r3, r2
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: movpl r1, r7
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: mov r11, #0
+; CHECK-NEXT: clz r1, r1
+; CHECK-NEXT: movwgt r11, #1
+; CHECK-NEXT: cmp r11, #0
+; CHECK-NEXT: movne r11, r3
+; CHECK-NEXT: lsr r1, r1, #5
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: mvn r9, #0
+; CHECK-NEXT: vmov r8, s16
+; CHECK-NEXT: movne r11, r3
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: mov r1, #0
+; CHECK-NEXT: mvn r10, #0
+; CHECK-NEXT: movwmi r1, #1
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: clz r1, r4
+; CHECK-NEXT: movne r10, r0
+; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: lsr r1, r1, #5
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: movne r10, r0
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: movpl r4, r7
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: movwgt r6, #1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: movne r6, r10
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movwmi r2, #1
+; CHECK-NEXT: clz r3, r1
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: mvn r2, #0
+; CHECK-NEXT: movne r2, r0
+; CHECK-NEXT: lsr r3, r3, #5
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: mov r5, #0
+; CHECK-NEXT: movne r2, r0
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: movpl r1, r7
+; CHECK-NEXT: clz r0, r1
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: movwgt r5, #1
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: lsr r0, r0, #5
+; CHECK-NEXT: movne r5, r2
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: movne r5, r2
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: mov r2, #0
+; CHECK-NEXT: movwmi r2, #1
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: clz r2, r1
+; CHECK-NEXT: movne r9, r0
+; CHECK-NEXT: vmov.32 d0[0], r5
+; CHECK-NEXT: lsr r2, r2, #5
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: movne r9, r0
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: movpl r1, r7
+; CHECK-NEXT: clz r0, r1
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: movwgt r7, #1
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: movne r7, r9
+; CHECK-NEXT: lsr r0, r0, #5
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: clz r0, r4
+; CHECK-NEXT: movne r7, r9
+; CHECK-NEXT: vmov.32 d1[0], r7
+; CHECK-NEXT: lsr r0, r0, #5
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: movne r6, r10
+; CHECK-NEXT: vmov.32 d1[1], r11
+; CHECK-NEXT: vmov.32 d0[1], r6
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: add sp, sp, #4
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
+ %spec.store.select7 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %spec.store.select, <4 x i64> zeroinitializer)
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
+; CHECK-NEON-LABEL: stest_f16i32_mm:
+; CHECK-NEON: @ %bb.0: @ %entry
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEON-NEXT: .vsave {d8, d9, d10}
+; CHECK-NEON-NEXT: vpush {d8, d9, d10}
+; CHECK-NEON-NEXT: vmov r0, s3
+; CHECK-NEON-NEXT: vmov.f32 s20, s2
+; CHECK-NEON-NEXT: vmov.f32 s16, s1
+; CHECK-NEON-NEXT: vmov.f32 s18, s0
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: bl __aeabi_f2lz
+; CHECK-NEON-NEXT: mov r4, r0
+; CHECK-NEON-NEXT: vmov r0, s20
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: mov r2, #0
+; CHECK-NEON-NEXT: movwmi r2, #1
+; CHECK-NEON-NEXT: clz r3, r1
+; CHECK-NEON-NEXT: cmp r2, #0
+; CHECK-NEON-NEXT: mvn r2, #-2147483648
+; CHECK-NEON-NEXT: movne r2, r4
+; CHECK-NEON-NEXT: mvn r7, #-2147483648
+; CHECK-NEON-NEXT: cmn r4, #-2147483647
+; CHECK-NEON-NEXT: lsr r3, r3, #5
+; CHECK-NEON-NEXT: movhs r4, r7
+; CHECK-NEON-NEXT: cmp r3, #0
+; CHECK-NEON-NEXT: moveq r4, r2
+; CHECK-NEON-NEXT: mov r9, #0
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: mov r2, #0
+; CHECK-NEON-NEXT: movpl r1, r9
+; CHECK-NEON-NEXT: cmn r1, #1
+; CHECK-NEON-NEXT: movwgt r2, #1
+; CHECK-NEON-NEXT: add r1, r1, #1
+; CHECK-NEON-NEXT: clz r1, r1
+; CHECK-NEON-NEXT: cmp r2, #0
+; CHECK-NEON-NEXT: mov r2, #-2147483648
+; CHECK-NEON-NEXT: mov r8, #-2147483648
+; CHECK-NEON-NEXT: movne r2, r4
+; CHECK-NEON-NEXT: cmp r4, #-2147483648
+; CHECK-NEON-NEXT: movls r4, r8
+; CHECK-NEON-NEXT: lsr r1, r1, #5
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: moveq r4, r2
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: bl __aeabi_f2lz
+; CHECK-NEON-NEXT: mov r5, r0
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: mov r0, #0
+; CHECK-NEON-NEXT: clz r2, r1
+; CHECK-NEON-NEXT: movwmi r0, #1
+; CHECK-NEON-NEXT: cmp r0, #0
+; CHECK-NEON-NEXT: mvn r0, #-2147483648
+; CHECK-NEON-NEXT: lsr r2, r2, #5
+; CHECK-NEON-NEXT: movne r0, r5
+; CHECK-NEON-NEXT: cmn r5, #-2147483647
+; CHECK-NEON-NEXT: movhs r5, r7
+; CHECK-NEON-NEXT: cmp r2, #0
+; CHECK-NEON-NEXT: moveq r5, r0
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: movpl r1, r9
+; CHECK-NEON-NEXT: cmn r1, #1
+; CHECK-NEON-NEXT: mov r0, #0
+; CHECK-NEON-NEXT: mov r2, #-2147483648
+; CHECK-NEON-NEXT: movwgt r0, #1
+; CHECK-NEON-NEXT: cmp r0, #0
+; CHECK-NEON-NEXT: add r0, r1, #1
+; CHECK-NEON-NEXT: movne r2, r5
+; CHECK-NEON-NEXT: clz r0, r0
+; CHECK-NEON-NEXT: cmp r5, #-2147483648
+; CHECK-NEON-NEXT: movls r5, r8
+; CHECK-NEON-NEXT: lsr r1, r0, #5
+; CHECK-NEON-NEXT: vmov r0, s18
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: moveq r5, r2
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: bl __aeabi_f2lz
+; CHECK-NEON-NEXT: mov r6, r0
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: mov r0, #0
+; CHECK-NEON-NEXT: clz r2, r1
+; CHECK-NEON-NEXT: movwmi r0, #1
+; CHECK-NEON-NEXT: cmp r0, #0
+; CHECK-NEON-NEXT: mvn r0, #-2147483648
+; CHECK-NEON-NEXT: lsr r2, r2, #5
+; CHECK-NEON-NEXT: movne r0, r6
+; CHECK-NEON-NEXT: cmn r6, #-2147483647
+; CHECK-NEON-NEXT: movhs r6, r7
+; CHECK-NEON-NEXT: cmp r2, #0
+; CHECK-NEON-NEXT: moveq r6, r0
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: movpl r1, r9
+; CHECK-NEON-NEXT: cmn r1, #1
+; CHECK-NEON-NEXT: mov r0, #0
+; CHECK-NEON-NEXT: mov r2, #-2147483648
+; CHECK-NEON-NEXT: movwgt r0, #1
+; CHECK-NEON-NEXT: cmp r0, #0
+; CHECK-NEON-NEXT: add r0, r1, #1
+; CHECK-NEON-NEXT: movne r2, r6
+; CHECK-NEON-NEXT: clz r0, r0
+; CHECK-NEON-NEXT: cmp r6, #-2147483648
+; CHECK-NEON-NEXT: movls r6, r8
+; CHECK-NEON-NEXT: lsr r1, r0, #5
+; CHECK-NEON-NEXT: vmov r0, s16
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: moveq r6, r2
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: bl __aeabi_f2lz
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: mov r2, #0
+; CHECK-NEON-NEXT: movwmi r2, #1
+; CHECK-NEON-NEXT: cmp r2, #0
+; CHECK-NEON-NEXT: mvn r2, #-2147483648
+; CHECK-NEON-NEXT: vmov.32 d0[0], r6
+; CHECK-NEON-NEXT: movne r2, r0
+; CHECK-NEON-NEXT: cmn r0, #-2147483647
+; CHECK-NEON-NEXT: movlo r7, r0
+; CHECK-NEON-NEXT: clz r0, r1
+; CHECK-NEON-NEXT: vmov.32 d1[0], r5
+; CHECK-NEON-NEXT: lsr r0, r0, #5
+; CHECK-NEON-NEXT: cmp r0, #0
+; CHECK-NEON-NEXT: moveq r7, r2
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: movpl r1, r9
+; CHECK-NEON-NEXT: cmn r1, #1
+; CHECK-NEON-NEXT: add r1, r1, #1
+; CHECK-NEON-NEXT: movwgt r9, #1
+; CHECK-NEON-NEXT: clz r1, r1
+; CHECK-NEON-NEXT: cmp r9, #0
+; CHECK-NEON-NEXT: mov r0, #-2147483648
+; CHECK-NEON-NEXT: vmov.32 d1[1], r4
+; CHECK-NEON-NEXT: movne r0, r7
+; CHECK-NEON-NEXT: cmp r7, #-2147483648
+; CHECK-NEON-NEXT: movls r7, r8
+; CHECK-NEON-NEXT: lsr r1, r1, #5
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: moveq r7, r0
+; CHECK-NEON-NEXT: vmov.32 d0[1], r7
+; CHECK-NEON-NEXT: vpop {d8, d9, d10}
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; CHECK-FP16-LABEL: stest_f16i32_mm:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-FP16-NEXT: .vsave {d8, d9}
+; CHECK-FP16-NEXT: vpush {d8, d9}
+; CHECK-FP16-NEXT: vmov.u16 r0, d0[3]
+; CHECK-FP16-NEXT: vorr d8, d0, d0
+; CHECK-FP16-NEXT: vmov s0, r0
+; CHECK-FP16-NEXT: bl __fixhfdi
+; CHECK-FP16-NEXT: mov r4, r0
+; CHECK-FP16-NEXT: vmov.u16 r0, d8[0]
+; CHECK-FP16-NEXT: vmov.u16 r2, d8[2]
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: mvn r7, #-2147483648
+; CHECK-FP16-NEXT: mov r9, #0
+; CHECK-FP16-NEXT: mov r8, #-2147483648
+; CHECK-FP16-NEXT: vmov s18, r0
+; CHECK-FP16-NEXT: mov r0, #0
+; CHECK-FP16-NEXT: movwmi r0, #1
+; CHECK-FP16-NEXT: vmov s0, r2
+; CHECK-FP16-NEXT: clz r2, r1
+; CHECK-FP16-NEXT: cmp r0, #0
+; CHECK-FP16-NEXT: mvn r0, #-2147483648
+; CHECK-FP16-NEXT: movne r0, r4
+; CHECK-FP16-NEXT: cmn r4, #-2147483647
+; CHECK-FP16-NEXT: movhs r4, r7
+; CHECK-FP16-NEXT: lsr r2, r2, #5
+; CHECK-FP16-NEXT: cmp r2, #0
+; CHECK-FP16-NEXT: moveq r4, r0
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: movpl r1, r9
+; CHECK-FP16-NEXT: cmn r1, #1
+; CHECK-FP16-NEXT: mov r0, #0
+; CHECK-FP16-NEXT: add r1, r1, #1
+; CHECK-FP16-NEXT: movwgt r0, #1
+; CHECK-FP16-NEXT: clz r1, r1
+; CHECK-FP16-NEXT: cmp r0, #0
+; CHECK-FP16-NEXT: mov r0, #-2147483648
+; CHECK-FP16-NEXT: movne r0, r4
+; CHECK-FP16-NEXT: cmp r4, #-2147483648
+; CHECK-FP16-NEXT: movls r4, r8
+; CHECK-FP16-NEXT: lsr r1, r1, #5
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: moveq r4, r0
+; CHECK-FP16-NEXT: bl __fixhfdi
+; CHECK-FP16-NEXT: mov r5, r0
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: mov r0, #0
+; CHECK-FP16-NEXT: clz r2, r1
+; CHECK-FP16-NEXT: movwmi r0, #1
+; CHECK-FP16-NEXT: cmp r0, #0
+; CHECK-FP16-NEXT: mvn r0, #-2147483648
+; CHECK-FP16-NEXT: vmov.f32 s0, s18
+; CHECK-FP16-NEXT: movne r0, r5
+; CHECK-FP16-NEXT: cmn r5, #-2147483647
+; CHECK-FP16-NEXT: lsr r2, r2, #5
+; CHECK-FP16-NEXT: movhs r5, r7
+; CHECK-FP16-NEXT: cmp r2, #0
+; CHECK-FP16-NEXT: moveq r5, r0
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: movpl r1, r9
+; CHECK-FP16-NEXT: cmn r1, #1
+; CHECK-FP16-NEXT: mov r0, #0
+; CHECK-FP16-NEXT: add r1, r1, #1
+; CHECK-FP16-NEXT: movwgt r0, #1
+; CHECK-FP16-NEXT: clz r1, r1
+; CHECK-FP16-NEXT: cmp r0, #0
+; CHECK-FP16-NEXT: mov r0, #-2147483648
+; CHECK-FP16-NEXT: movne r0, r5
+; CHECK-FP16-NEXT: cmp r5, #-2147483648
+; CHECK-FP16-NEXT: movls r5, r8
+; CHECK-FP16-NEXT: lsr r1, r1, #5
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: moveq r5, r0
+; CHECK-FP16-NEXT: bl __fixhfdi
+; CHECK-FP16-NEXT: mov r6, r0
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: mov r0, #0
+; CHECK-FP16-NEXT: clz r2, r1
+; CHECK-FP16-NEXT: movwmi r0, #1
+; CHECK-FP16-NEXT: cmp r0, #0
+; CHECK-FP16-NEXT: mvn r0, #-2147483648
+; CHECK-FP16-NEXT: lsr r2, r2, #5
+; CHECK-FP16-NEXT: movne r0, r6
+; CHECK-FP16-NEXT: cmn r6, #-2147483647
+; CHECK-FP16-NEXT: movhs r6, r7
+; CHECK-FP16-NEXT: cmp r2, #0
+; CHECK-FP16-NEXT: vmov.u16 r2, d8[1]
+; CHECK-FP16-NEXT: moveq r6, r0
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: mov r0, #0
+; CHECK-FP16-NEXT: movpl r1, r9
+; CHECK-FP16-NEXT: cmn r1, #1
+; CHECK-FP16-NEXT: movwgt r0, #1
+; CHECK-FP16-NEXT: add r1, r1, #1
+; CHECK-FP16-NEXT: clz r1, r1
+; CHECK-FP16-NEXT: cmp r0, #0
+; CHECK-FP16-NEXT: mov r0, #-2147483648
+; CHECK-FP16-NEXT: movne r0, r6
+; CHECK-FP16-NEXT: cmp r6, #-2147483648
+; CHECK-FP16-NEXT: movls r6, r8
+; CHECK-FP16-NEXT: lsr r1, r1, #5
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: moveq r6, r0
+; CHECK-FP16-NEXT: vmov s0, r2
+; CHECK-FP16-NEXT: bl __fixhfdi
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: mov r2, #0
+; CHECK-FP16-NEXT: movwmi r2, #1
+; CHECK-FP16-NEXT: cmp r2, #0
+; CHECK-FP16-NEXT: mvn r2, #-2147483648
+; CHECK-FP16-NEXT: vmov.32 d0[0], r6
+; CHECK-FP16-NEXT: movne r2, r0
+; CHECK-FP16-NEXT: cmn r0, #-2147483647
+; CHECK-FP16-NEXT: movlo r7, r0
+; CHECK-FP16-NEXT: clz r0, r1
+; CHECK-FP16-NEXT: vmov.32 d1[0], r5
+; CHECK-FP16-NEXT: lsr r0, r0, #5
+; CHECK-FP16-NEXT: cmp r0, #0
+; CHECK-FP16-NEXT: moveq r7, r2
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: movpl r1, r9
+; CHECK-FP16-NEXT: cmn r1, #1
+; CHECK-FP16-NEXT: add r1, r1, #1
+; CHECK-FP16-NEXT: movwgt r9, #1
+; CHECK-FP16-NEXT: clz r1, r1
+; CHECK-FP16-NEXT: cmp r9, #0
+; CHECK-FP16-NEXT: mov r0, #-2147483648
+; CHECK-FP16-NEXT: vmov.32 d1[1], r4
+; CHECK-FP16-NEXT: movne r0, r7
+; CHECK-FP16-NEXT: cmp r7, #-2147483648
+; CHECK-FP16-NEXT: movls r7, r8
+; CHECK-FP16-NEXT: lsr r1, r1, #5
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: moveq r7, r0
+; CHECK-FP16-NEXT: vmov.32 d0[1], r7
+; CHECK-FP16-NEXT: vpop {d8, d9}
+; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
+ %spec.store.select7 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %spec.store.select, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
+; CHECK-NEON-LABEL: utesth_f16i32_mm:
+; CHECK-NEON: @ %bb.0: @ %entry
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEON-NEXT: vmov r0, s1
+; CHECK-NEON-NEXT: vmov.f32 s16, s3
+; CHECK-NEON-NEXT: vmov.f32 s18, s2
+; CHECK-NEON-NEXT: vmov.f32 s20, s0
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: bl __aeabi_f2ulz
+; CHECK-NEON-NEXT: mov r4, r1
+; CHECK-NEON-NEXT: vmov r1, s18
+; CHECK-NEON-NEXT: vmov r6, s16
+; CHECK-NEON-NEXT: vmov.32 d9[0], r0
+; CHECK-NEON-NEXT: vmov r7, s20
+; CHECK-NEON-NEXT: mov r0, r1
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: bl __aeabi_f2ulz
+; CHECK-NEON-NEXT: vmov.32 d10[0], r0
+; CHECK-NEON-NEXT: mov r0, r6
+; CHECK-NEON-NEXT: mov r5, r1
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: bl __aeabi_f2ulz
+; CHECK-NEON-NEXT: vmov.32 d11[0], r0
+; CHECK-NEON-NEXT: mov r0, r7
+; CHECK-NEON-NEXT: mov r6, r1
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: bl __aeabi_f2ulz
+; CHECK-NEON-NEXT: vmov.32 d8[0], r0
+; CHECK-NEON-NEXT: vmov.i64 q8, #0xffffffff
+; CHECK-NEON-NEXT: vmov.32 d11[1], r6
+; CHECK-NEON-NEXT: vmov.32 d9[1], r4
+; CHECK-NEON-NEXT: vmov.32 d10[1], r5
+; CHECK-NEON-NEXT: vmov.32 d8[1], r1
+; CHECK-NEON-NEXT: vqsub.u64 q9, q5, q8
+; CHECK-NEON-NEXT: vqsub.u64 q8, q4, q8
+; CHECK-NEON-NEXT: vsub.i64 q9, q5, q9
+; CHECK-NEON-NEXT: vsub.i64 q8, q4, q8
+; CHECK-NEON-NEXT: vmovn.i64 d1, q9
+; CHECK-NEON-NEXT: vmovn.i64 d0, q8
+; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
+;
+; CHECK-FP16-LABEL: utesth_f16i32_mm:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: .save {r4, r5, r6, lr}
+; CHECK-FP16-NEXT: push {r4, r5, r6, lr}
+; CHECK-FP16-NEXT: .vsave {d10, d11, d12, d13}
+; CHECK-FP16-NEXT: vpush {d10, d11, d12, d13}
+; CHECK-FP16-NEXT: .vsave {d8}
+; CHECK-FP16-NEXT: vpush {d8}
+; CHECK-FP16-NEXT: vmov.u16 r0, d0[1]
+; CHECK-FP16-NEXT: vorr d8, d0, d0
+; CHECK-FP16-NEXT: vmov.u16 r6, d0[3]
+; CHECK-FP16-NEXT: vmov s0, r0
+; CHECK-FP16-NEXT: bl __fixunshfdi
+; CHECK-FP16-NEXT: mov r4, r1
+; CHECK-FP16-NEXT: vmov.u16 r1, d8[2]
+; CHECK-FP16-NEXT: vmov.32 d11[0], r0
+; CHECK-FP16-NEXT: vmov s0, r1
+; CHECK-FP16-NEXT: bl __fixunshfdi
+; CHECK-FP16-NEXT: vmov s0, r6
+; CHECK-FP16-NEXT: mov r5, r1
+; CHECK-FP16-NEXT: vmov.32 d12[0], r0
+; CHECK-FP16-NEXT: bl __fixunshfdi
+; CHECK-FP16-NEXT: mov r6, r1
+; CHECK-FP16-NEXT: vmov.u16 r1, d8[0]
+; CHECK-FP16-NEXT: vmov.32 d13[0], r0
+; CHECK-FP16-NEXT: vmov s0, r1
+; CHECK-FP16-NEXT: bl __fixunshfdi
+; CHECK-FP16-NEXT: vmov.32 d10[0], r0
+; CHECK-FP16-NEXT: vmov.i64 q8, #0xffffffff
+; CHECK-FP16-NEXT: vmov.32 d13[1], r6
+; CHECK-FP16-NEXT: vmov.32 d11[1], r4
+; CHECK-FP16-NEXT: vmov.32 d12[1], r5
+; CHECK-FP16-NEXT: vmov.32 d10[1], r1
+; CHECK-FP16-NEXT: vqsub.u64 q9, q6, q8
+; CHECK-FP16-NEXT: vqsub.u64 q8, q5, q8
+; CHECK-FP16-NEXT: vsub.i64 q9, q6, q9
+; CHECK-FP16-NEXT: vsub.i64 q8, q5, q8
+; CHECK-FP16-NEXT: vmovn.i64 d1, q9
+; CHECK-FP16-NEXT: vmovn.i64 d0, q8
+; CHECK-FP16-NEXT: vpop {d8}
+; CHECK-FP16-NEXT: vpop {d10, d11, d12, d13}
+; CHECK-FP16-NEXT: pop {r4, r5, r6, pc}
+entry:
+ %conv = fptoui <4 x half> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
+ %conv6 = trunc <4 x i64> %spec.store.select to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
+; CHECK-NEON-LABEL: ustest_f16i32_mm:
+; CHECK-NEON: @ %bb.0: @ %entry
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEON-NEXT: .pad #4
+; CHECK-NEON-NEXT: sub sp, sp, #4
+; CHECK-NEON-NEXT: .vsave {d8, d9, d10}
+; CHECK-NEON-NEXT: vpush {d8, d9, d10}
+; CHECK-NEON-NEXT: vmov r0, s3
+; CHECK-NEON-NEXT: vmov.f32 s16, s2
+; CHECK-NEON-NEXT: vmov.f32 s18, s1
+; CHECK-NEON-NEXT: vmov.f32 s20, s0
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: bl __aeabi_f2lz
+; CHECK-NEON-NEXT: mov r2, r0
+; CHECK-NEON-NEXT: vmov r0, s18
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: mov r3, #0
+; CHECK-NEON-NEXT: movwmi r3, #1
+; CHECK-NEON-NEXT: clz r6, r1
+; CHECK-NEON-NEXT: cmp r3, #0
+; CHECK-NEON-NEXT: mvn r3, #0
+; CHECK-NEON-NEXT: movne r3, r2
+; CHECK-NEON-NEXT: lsr r6, r6, #5
+; CHECK-NEON-NEXT: cmp r6, #0
+; CHECK-NEON-NEXT: mov r7, #0
+; CHECK-NEON-NEXT: movne r3, r2
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: movpl r1, r7
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: mov r11, #0
+; CHECK-NEON-NEXT: clz r1, r1
+; CHECK-NEON-NEXT: movwgt r11, #1
+; CHECK-NEON-NEXT: cmp r11, #0
+; CHECK-NEON-NEXT: movne r11, r3
+; CHECK-NEON-NEXT: lsr r1, r1, #5
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: mvn r9, #0
+; CHECK-NEON-NEXT: vmov r8, s20
+; CHECK-NEON-NEXT: movne r11, r3
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: bl __aeabi_f2lz
+; CHECK-NEON-NEXT: mov r4, r1
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: mov r1, #0
+; CHECK-NEON-NEXT: mvn r10, #0
+; CHECK-NEON-NEXT: movwmi r1, #1
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: clz r1, r4
+; CHECK-NEON-NEXT: movne r10, r0
+; CHECK-NEON-NEXT: mov r6, #0
+; CHECK-NEON-NEXT: lsr r1, r1, #5
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: movne r10, r0
+; CHECK-NEON-NEXT: cmp r4, #0
+; CHECK-NEON-NEXT: movpl r4, r7
+; CHECK-NEON-NEXT: cmp r4, #0
+; CHECK-NEON-NEXT: movwgt r6, #1
+; CHECK-NEON-NEXT: cmp r6, #0
+; CHECK-NEON-NEXT: mov r0, r8
+; CHECK-NEON-NEXT: movne r6, r10
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: bl __aeabi_f2lz
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: mov r2, #0
+; CHECK-NEON-NEXT: movwmi r2, #1
+; CHECK-NEON-NEXT: clz r3, r1
+; CHECK-NEON-NEXT: cmp r2, #0
+; CHECK-NEON-NEXT: mvn r2, #0
+; CHECK-NEON-NEXT: movne r2, r0
+; CHECK-NEON-NEXT: lsr r3, r3, #5
+; CHECK-NEON-NEXT: cmp r3, #0
+; CHECK-NEON-NEXT: mov r5, #0
+; CHECK-NEON-NEXT: movne r2, r0
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: movpl r1, r7
+; CHECK-NEON-NEXT: clz r0, r1
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: movwgt r5, #1
+; CHECK-NEON-NEXT: cmp r5, #0
+; CHECK-NEON-NEXT: lsr r0, r0, #5
+; CHECK-NEON-NEXT: movne r5, r2
+; CHECK-NEON-NEXT: cmp r0, #0
+; CHECK-NEON-NEXT: vmov r0, s16
+; CHECK-NEON-NEXT: movne r5, r2
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: bl __aeabi_f2lz
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: mov r2, #0
+; CHECK-NEON-NEXT: movwmi r2, #1
+; CHECK-NEON-NEXT: cmp r2, #0
+; CHECK-NEON-NEXT: clz r2, r1
+; CHECK-NEON-NEXT: movne r9, r0
+; CHECK-NEON-NEXT: vmov.32 d0[0], r5
+; CHECK-NEON-NEXT: lsr r2, r2, #5
+; CHECK-NEON-NEXT: cmp r2, #0
+; CHECK-NEON-NEXT: movne r9, r0
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: movpl r1, r7
+; CHECK-NEON-NEXT: clz r0, r1
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: movwgt r7, #1
+; CHECK-NEON-NEXT: cmp r7, #0
+; CHECK-NEON-NEXT: movne r7, r9
+; CHECK-NEON-NEXT: lsr r0, r0, #5
+; CHECK-NEON-NEXT: cmp r0, #0
+; CHECK-NEON-NEXT: clz r0, r4
+; CHECK-NEON-NEXT: movne r7, r9
+; CHECK-NEON-NEXT: vmov.32 d1[0], r7
+; CHECK-NEON-NEXT: lsr r0, r0, #5
+; CHECK-NEON-NEXT: cmp r0, #0
+; CHECK-NEON-NEXT: movne r6, r10
+; CHECK-NEON-NEXT: vmov.32 d1[1], r11
+; CHECK-NEON-NEXT: vmov.32 d0[1], r6
+; CHECK-NEON-NEXT: vpop {d8, d9, d10}
+; CHECK-NEON-NEXT: add sp, sp, #4
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; CHECK-FP16-LABEL: ustest_f16i32_mm:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-FP16-NEXT: .vsave {d8, d9}
+; CHECK-FP16-NEXT: vpush {d8, d9}
+; CHECK-FP16-NEXT: vmov.u16 r0, d0[3]
+; CHECK-FP16-NEXT: vorr d8, d0, d0
+; CHECK-FP16-NEXT: vmov.u16 r4, d0[1]
+; CHECK-FP16-NEXT: vmov s0, r0
+; CHECK-FP16-NEXT: bl __fixhfdi
+; CHECK-FP16-NEXT: vmov.u16 r2, d8[0]
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: clz r3, r1
+; CHECK-FP16-NEXT: mov r6, #0
+; CHECK-FP16-NEXT: mov r10, #0
+; CHECK-FP16-NEXT: vmov s0, r4
+; CHECK-FP16-NEXT: lsr r3, r3, #5
+; CHECK-FP16-NEXT: mvn r8, #0
+; CHECK-FP16-NEXT: vmov s18, r2
+; CHECK-FP16-NEXT: mov r2, #0
+; CHECK-FP16-NEXT: movwmi r2, #1
+; CHECK-FP16-NEXT: cmp r2, #0
+; CHECK-FP16-NEXT: mvn r2, #0
+; CHECK-FP16-NEXT: movne r2, r0
+; CHECK-FP16-NEXT: cmp r3, #0
+; CHECK-FP16-NEXT: movne r2, r0
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: movpl r1, r6
+; CHECK-FP16-NEXT: clz r0, r1
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: movwgt r10, #1
+; CHECK-FP16-NEXT: cmp r10, #0
+; CHECK-FP16-NEXT: movne r10, r2
+; CHECK-FP16-NEXT: lsr r0, r0, #5
+; CHECK-FP16-NEXT: cmp r0, #0
+; CHECK-FP16-NEXT: movne r10, r2
+; CHECK-FP16-NEXT: bl __fixhfdi
+; CHECK-FP16-NEXT: mov r4, r1
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: mov r1, #0
+; CHECK-FP16-NEXT: vmov.f32 s0, s18
+; CHECK-FP16-NEXT: movwmi r1, #1
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: clz r1, r4
+; CHECK-FP16-NEXT: mvn r9, #0
+; CHECK-FP16-NEXT: movne r9, r0
+; CHECK-FP16-NEXT: mov r5, #0
+; CHECK-FP16-NEXT: lsr r1, r1, #5
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: movne r9, r0
+; CHECK-FP16-NEXT: cmp r4, #0
+; CHECK-FP16-NEXT: movpl r4, r6
+; CHECK-FP16-NEXT: cmp r4, #0
+; CHECK-FP16-NEXT: movwgt r5, #1
+; CHECK-FP16-NEXT: cmp r5, #0
+; CHECK-FP16-NEXT: movne r5, r9
+; CHECK-FP16-NEXT: bl __fixhfdi
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: mov r2, #0
+; CHECK-FP16-NEXT: movwmi r2, #1
+; CHECK-FP16-NEXT: clz r3, r1
+; CHECK-FP16-NEXT: cmp r2, #0
+; CHECK-FP16-NEXT: mvn r2, #0
+; CHECK-FP16-NEXT: movne r2, r0
+; CHECK-FP16-NEXT: lsr r3, r3, #5
+; CHECK-FP16-NEXT: cmp r3, #0
+; CHECK-FP16-NEXT: mov r7, #0
+; CHECK-FP16-NEXT: movne r2, r0
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: movpl r1, r6
+; CHECK-FP16-NEXT: clz r0, r1
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: vmov.u16 r1, d8[2]
+; CHECK-FP16-NEXT: movwgt r7, #1
+; CHECK-FP16-NEXT: cmp r7, #0
+; CHECK-FP16-NEXT: movne r7, r2
+; CHECK-FP16-NEXT: lsr r0, r0, #5
+; CHECK-FP16-NEXT: cmp r0, #0
+; CHECK-FP16-NEXT: movne r7, r2
+; CHECK-FP16-NEXT: vmov s0, r1
+; CHECK-FP16-NEXT: bl __fixhfdi
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: mov r2, #0
+; CHECK-FP16-NEXT: movwmi r2, #1
+; CHECK-FP16-NEXT: cmp r2, #0
+; CHECK-FP16-NEXT: clz r2, r1
+; CHECK-FP16-NEXT: movne r8, r0
+; CHECK-FP16-NEXT: vmov.32 d0[0], r7
+; CHECK-FP16-NEXT: lsr r2, r2, #5
+; CHECK-FP16-NEXT: cmp r2, #0
+; CHECK-FP16-NEXT: movne r8, r0
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: movpl r1, r6
+; CHECK-FP16-NEXT: clz r0, r1
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: movwgt r6, #1
+; CHECK-FP16-NEXT: cmp r6, #0
+; CHECK-FP16-NEXT: movne r6, r8
+; CHECK-FP16-NEXT: lsr r0, r0, #5
+; CHECK-FP16-NEXT: cmp r0, #0
+; CHECK-FP16-NEXT: clz r0, r4
+; CHECK-FP16-NEXT: movne r6, r8
+; CHECK-FP16-NEXT: vmov.32 d1[0], r6
+; CHECK-FP16-NEXT: lsr r0, r0, #5
+; CHECK-FP16-NEXT: cmp r0, #0
+; CHECK-FP16-NEXT: movne r5, r9
+; CHECK-FP16-NEXT: vmov.32 d1[1], r10
+; CHECK-FP16-NEXT: vmov.32 d0[1], r5
+; CHECK-FP16-NEXT: vpop {d8, d9}
+; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
+ %spec.store.select7 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %spec.store.select, <4 x i64> zeroinitializer)
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+; i16 saturate
+
+define <2 x i16> @stest_f64i16_mm(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i16_mm:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcvt.s32.f64 s4, d0
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vcvt.s32.f64 s0, d1
+; CHECK-NEXT: vmov.i32 d17, #0x7fff
+; CHECK-NEXT: vmvn.i32 d18, #0x7fff
+; CHECK-NEXT: vmov.32 d16[0], r0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: vmov.32 d16[1], r0
+; CHECK-NEXT: vmin.s32 d16, d16, d17
+; CHECK-NEXT: vmax.s32 d0, d16, d18
+; CHECK-NEXT: bx lr
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i32>
+ %spec.store.select = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %conv, <2 x i32> <i32 32767, i32 32767>)
+ %spec.store.select7 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %spec.store.select, <2 x i32> <i32 -32768, i32 -32768>)
+ %conv6 = trunc <2 x i32> %spec.store.select7 to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <2 x i16> @utest_f64i16_mm(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i16_mm:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcvt.u32.f64 s4, d0
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vcvt.u32.f64 s0, d1
+; CHECK-NEXT: vmov.i32 d17, #0xffff
+; CHECK-NEXT: vmov.32 d16[0], r0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: vmov.32 d16[1], r0
+; CHECK-NEXT: vmin.u32 d0, d16, d17
+; CHECK-NEXT: bx lr
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i32>
+ %spec.store.select = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %conv, <2 x i32> <i32 65535, i32 65535>)
+ %conv6 = trunc <2 x i32> %spec.store.select to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <2 x i16> @ustest_f64i16_mm(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i16_mm:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcvt.s32.f64 s4, d0
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vcvt.s32.f64 s0, d1
+; CHECK-NEXT: vmov.i32 d17, #0xffff
+; CHECK-NEXT: vmov.i32 d18, #0x0
+; CHECK-NEXT: vmov.32 d16[0], r0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: vmov.32 d16[1], r0
+; CHECK-NEXT: vmin.s32 d16, d16, d17
+; CHECK-NEXT: vmax.s32 d0, d16, d18
+; CHECK-NEXT: bx lr
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i32>
+ %spec.store.select = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %conv, <2 x i32> <i32 65535, i32 65535>)
+ %spec.store.select7 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %spec.store.select, <2 x i32> zeroinitializer)
+ %conv6 = trunc <2 x i32> %spec.store.select7 to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <4 x i16> @stest_f32i16_mm(<4 x float> %x) {
+; CHECK-LABEL: stest_f32i16_mm:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcvt.s32.f32 q8, q0
+; CHECK-NEXT: vmov.i32 q9, #0x7fff
+; CHECK-NEXT: vmvn.i32 q10, #0x7fff
+; CHECK-NEXT: vmin.s32 q8, q8, q9
+; CHECK-NEXT: vmax.s32 q8, q8, q10
+; CHECK-NEXT: vmovn.i32 d0, q8
+; CHECK-NEXT: bx lr
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i32>
+ %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %conv, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
+ %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
+ %conv6 = trunc <4 x i32> %spec.store.select7 to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <4 x i16> @utest_f32i16_mm(<4 x float> %x) {
+; CHECK-LABEL: utest_f32i16_mm:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcvt.u32.f32 q8, q0
+; CHECK-NEXT: vmov.i32 q9, #0xffff
+; CHECK-NEXT: vmin.u32 q8, q8, q9
+; CHECK-NEXT: vmovn.i32 d0, q8
+; CHECK-NEXT: bx lr
+entry:
+ %conv = fptoui <4 x float> %x to <4 x i32>
+ %spec.store.select = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %conv, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
+ %conv6 = trunc <4 x i32> %spec.store.select to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) {
+; CHECK-LABEL: ustest_f32i16_mm:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcvt.s32.f32 q8, q0
+; CHECK-NEXT: vmov.i32 q9, #0xffff
+; CHECK-NEXT: vmov.i32 q10, #0x0
+; CHECK-NEXT: vmin.s32 q8, q8, q9
+; CHECK-NEXT: vmax.s32 q8, q8, q10
+; CHECK-NEXT: vmovn.i32 d0, q8
+; CHECK-NEXT: bx lr
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i32>
+ %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %conv, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
+ %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer)
+ %conv6 = trunc <4 x i32> %spec.store.select7 to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
+; CHECK-NEON-LABEL: stest_f16i16_mm:
+; CHECK-NEON: @ %bb.0: @ %entry
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEON-NEXT: vmov r0, s1
+; CHECK-NEON-NEXT: vmov.f32 s16, s7
+; CHECK-NEON-NEXT: vmov.f32 s18, s6
+; CHECK-NEON-NEXT: vmov.f32 s20, s5
+; CHECK-NEON-NEXT: vmov.f32 s22, s4
+; CHECK-NEON-NEXT: vmov.f32 s24, s3
+; CHECK-NEON-NEXT: vmov.f32 s26, s2
+; CHECK-NEON-NEXT: vmov.f32 s28, s0
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r4, r0
+; CHECK-NEON-NEXT: vmov r0, s26
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r5, r0
+; CHECK-NEON-NEXT: vmov r0, s22
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r6, r0
+; CHECK-NEON-NEXT: vmov r0, s24
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r7, r0
+; CHECK-NEON-NEXT: vmov r0, s18
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.32 d13[0], r0
+; CHECK-NEON-NEXT: vmov r0, s16
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vmov s22, r7
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov s30, r6
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.32 d13[1], r0
+; CHECK-NEON-NEXT: vmov r0, s28
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vmov r1, s20
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov s2, r5
+; CHECK-NEON-NEXT: vcvt.s32.f32 s20, s2
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s30
+; CHECK-NEON-NEXT: vmov.32 d8[0], r0
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.32 d12[0], r0
+; CHECK-NEON-NEXT: mov r0, r1
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vmov r0, s20
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov s2, r4
+; CHECK-NEON-NEXT: vmov.i32 q8, #0x7fff
+; CHECK-NEON-NEXT: vcvt.s32.f32 s2, s2
+; CHECK-NEON-NEXT: vmvn.i32 q9, #0x7fff
+; CHECK-NEON-NEXT: vmov.32 d9[0], r0
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s22
+; CHECK-NEON-NEXT: vmov.32 d12[1], r0
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmin.s32 q10, q6, q8
+; CHECK-NEON-NEXT: vmax.s32 q10, q10, q9
+; CHECK-NEON-NEXT: vmov.32 d9[1], r0
+; CHECK-NEON-NEXT: vmov r0, s2
+; CHECK-NEON-NEXT: vmovn.i32 d1, q10
+; CHECK-NEON-NEXT: vmov.32 d8[1], r0
+; CHECK-NEON-NEXT: vmin.s32 q8, q4, q8
+; CHECK-NEON-NEXT: vmax.s32 q8, q8, q9
+; CHECK-NEON-NEXT: vmovn.i32 d0, q8
+; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
+;
+; CHECK-FP16-LABEL: stest_f16i16_mm:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: vmovx.f16 s4, s0
+; CHECK-FP16-NEXT: vcvt.s32.f16 s12, s0
+; CHECK-FP16-NEXT: vcvt.s32.f16 s0, s3
+; CHECK-FP16-NEXT: vcvt.s32.f16 s5, s2
+; CHECK-FP16-NEXT: vmov r0, s0
+; CHECK-FP16-NEXT: vcvt.s32.f16 s14, s1
+; CHECK-FP16-NEXT: vmovx.f16 s10, s3
+; CHECK-FP16-NEXT: vmovx.f16 s8, s2
+; CHECK-FP16-NEXT: vcvt.s32.f16 s10, s10
+; CHECK-FP16-NEXT: vcvt.s32.f16 s8, s8
+; CHECK-FP16-NEXT: vmovx.f16 s6, s1
+; CHECK-FP16-NEXT: vcvt.s32.f16 s4, s4
+; CHECK-FP16-NEXT: vcvt.s32.f16 s6, s6
+; CHECK-FP16-NEXT: vmov.i32 q10, #0x7fff
+; CHECK-FP16-NEXT: vmvn.i32 q11, #0x7fff
+; CHECK-FP16-NEXT: vmov.32 d17[0], r0
+; CHECK-FP16-NEXT: vmov r0, s5
+; CHECK-FP16-NEXT: vmov.32 d16[0], r0
+; CHECK-FP16-NEXT: vmov r0, s14
+; CHECK-FP16-NEXT: vmov.32 d19[0], r0
+; CHECK-FP16-NEXT: vmov r0, s12
+; CHECK-FP16-NEXT: vmov.32 d18[0], r0
+; CHECK-FP16-NEXT: vmov r0, s10
+; CHECK-FP16-NEXT: vmov.32 d17[1], r0
+; CHECK-FP16-NEXT: vmov r0, s8
+; CHECK-FP16-NEXT: vmov.32 d16[1], r0
+; CHECK-FP16-NEXT: vmov r0, s6
+; CHECK-FP16-NEXT: vmin.s32 q8, q8, q10
+; CHECK-FP16-NEXT: vmax.s32 q8, q8, q11
+; CHECK-FP16-NEXT: vmovn.i32 d1, q8
+; CHECK-FP16-NEXT: vmov.32 d19[1], r0
+; CHECK-FP16-NEXT: vmov r0, s4
+; CHECK-FP16-NEXT: vmov.32 d18[1], r0
+; CHECK-FP16-NEXT: vmin.s32 q9, q9, q10
+; CHECK-FP16-NEXT: vmax.s32 q9, q9, q11
+; CHECK-FP16-NEXT: vmovn.i32 d0, q9
+; CHECK-FP16-NEXT: bx lr
+entry:
+ %conv = fptosi <8 x half> %x to <8 x i32>
+ %spec.store.select = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %conv, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>)
+ %spec.store.select7 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %spec.store.select, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
+ %conv6 = trunc <8 x i32> %spec.store.select7 to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
+; CHECK-NEON-LABEL: utesth_f16i16_mm:
+; CHECK-NEON: @ %bb.0: @ %entry
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
+; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
+; CHECK-NEON-NEXT: vmov r0, s1
+; CHECK-NEON-NEXT: vmov.f32 s16, s7
+; CHECK-NEON-NEXT: vmov.f32 s18, s6
+; CHECK-NEON-NEXT: vmov.f32 s20, s5
+; CHECK-NEON-NEXT: vmov.f32 s22, s4
+; CHECK-NEON-NEXT: vmov.f32 s24, s3
+; CHECK-NEON-NEXT: vmov.f32 s26, s2
+; CHECK-NEON-NEXT: vmov.f32 s28, s0
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r4, r0
+; CHECK-NEON-NEXT: vmov r0, s26
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r5, r0
+; CHECK-NEON-NEXT: vmov r0, s22
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r6, r0
+; CHECK-NEON-NEXT: vmov r0, s24
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r7, r0
+; CHECK-NEON-NEXT: vmov r0, s18
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vcvt.u32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.32 d13[0], r0
+; CHECK-NEON-NEXT: vmov r0, s16
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vmov s16, r7
+; CHECK-NEON-NEXT: vcvt.u32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov s18, r6
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.32 d13[1], r0
+; CHECK-NEON-NEXT: vmov r0, s28
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vmov r1, s20
+; CHECK-NEON-NEXT: vcvt.u32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov s2, r5
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vcvt.u32.f32 s0, s18
+; CHECK-NEON-NEXT: vcvt.u32.f32 s18, s2
+; CHECK-NEON-NEXT: vmov.32 d10[0], r0
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.32 d12[0], r0
+; CHECK-NEON-NEXT: mov r0, r1
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vmov r0, s18
+; CHECK-NEON-NEXT: vcvt.u32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov s2, r4
+; CHECK-NEON-NEXT: vmov.i32 q8, #0xffff
+; CHECK-NEON-NEXT: vcvt.u32.f32 s2, s2
+; CHECK-NEON-NEXT: vmov.32 d11[0], r0
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vcvt.u32.f32 s0, s16
+; CHECK-NEON-NEXT: vmov.32 d12[1], r0
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmin.u32 q9, q6, q8
+; CHECK-NEON-NEXT: vmov.32 d11[1], r0
+; CHECK-NEON-NEXT: vmov r0, s2
+; CHECK-NEON-NEXT: vmovn.i32 d1, q9
+; CHECK-NEON-NEXT: vmov.32 d10[1], r0
+; CHECK-NEON-NEXT: vmin.u32 q8, q5, q8
+; CHECK-NEON-NEXT: vmovn.i32 d0, q8
+; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
+;
+; CHECK-FP16-LABEL: utesth_f16i16_mm:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: vmovx.f16 s4, s0
+; CHECK-FP16-NEXT: vcvt.u32.f16 s12, s0
+; CHECK-FP16-NEXT: vcvt.u32.f16 s0, s3
+; CHECK-FP16-NEXT: vcvt.u32.f16 s5, s2
+; CHECK-FP16-NEXT: vmov r0, s0
+; CHECK-FP16-NEXT: vcvt.u32.f16 s14, s1
+; CHECK-FP16-NEXT: vmovx.f16 s10, s3
+; CHECK-FP16-NEXT: vmovx.f16 s8, s2
+; CHECK-FP16-NEXT: vcvt.u32.f16 s10, s10
+; CHECK-FP16-NEXT: vcvt.u32.f16 s8, s8
+; CHECK-FP16-NEXT: vmovx.f16 s6, s1
+; CHECK-FP16-NEXT: vcvt.u32.f16 s4, s4
+; CHECK-FP16-NEXT: vcvt.u32.f16 s6, s6
+; CHECK-FP16-NEXT: vmov.i32 q10, #0xffff
+; CHECK-FP16-NEXT: vmov.32 d17[0], r0
+; CHECK-FP16-NEXT: vmov r0, s5
+; CHECK-FP16-NEXT: vmov.32 d16[0], r0
+; CHECK-FP16-NEXT: vmov r0, s14
+; CHECK-FP16-NEXT: vmov.32 d19[0], r0
+; CHECK-FP16-NEXT: vmov r0, s12
+; CHECK-FP16-NEXT: vmov.32 d18[0], r0
+; CHECK-FP16-NEXT: vmov r0, s10
+; CHECK-FP16-NEXT: vmov.32 d17[1], r0
+; CHECK-FP16-NEXT: vmov r0, s8
+; CHECK-FP16-NEXT: vmov.32 d16[1], r0
+; CHECK-FP16-NEXT: vmov r0, s6
+; CHECK-FP16-NEXT: vmin.u32 q8, q8, q10
+; CHECK-FP16-NEXT: vmovn.i32 d1, q8
+; CHECK-FP16-NEXT: vmov.32 d19[1], r0
+; CHECK-FP16-NEXT: vmov r0, s4
+; CHECK-FP16-NEXT: vmov.32 d18[1], r0
+; CHECK-FP16-NEXT: vmin.u32 q9, q9, q10
+; CHECK-FP16-NEXT: vmovn.i32 d0, q9
+; CHECK-FP16-NEXT: bx lr
+entry:
+ %conv = fptoui <8 x half> %x to <8 x i32>
+ %spec.store.select = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %conv, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>)
+ %conv6 = trunc <8 x i32> %spec.store.select to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
+; CHECK-NEON-LABEL: ustest_f16i16_mm:
+; CHECK-NEON: @ %bb.0: @ %entry
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEON-NEXT: vmov r0, s1
+; CHECK-NEON-NEXT: vmov.f32 s16, s7
+; CHECK-NEON-NEXT: vmov.f32 s18, s6
+; CHECK-NEON-NEXT: vmov.f32 s20, s5
+; CHECK-NEON-NEXT: vmov.f32 s22, s4
+; CHECK-NEON-NEXT: vmov.f32 s24, s3
+; CHECK-NEON-NEXT: vmov.f32 s26, s2
+; CHECK-NEON-NEXT: vmov.f32 s28, s0
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r4, r0
+; CHECK-NEON-NEXT: vmov r0, s26
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r5, r0
+; CHECK-NEON-NEXT: vmov r0, s22
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r6, r0
+; CHECK-NEON-NEXT: vmov r0, s24
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r7, r0
+; CHECK-NEON-NEXT: vmov r0, s18
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.32 d13[0], r0
+; CHECK-NEON-NEXT: vmov r0, s16
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vmov s22, r7
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov s30, r6
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.32 d13[1], r0
+; CHECK-NEON-NEXT: vmov r0, s28
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vmov r1, s20
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov s2, r5
+; CHECK-NEON-NEXT: vcvt.s32.f32 s20, s2
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s30
+; CHECK-NEON-NEXT: vmov.32 d8[0], r0
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.32 d12[0], r0
+; CHECK-NEON-NEXT: mov r0, r1
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: vmov r0, s20
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s0
+; CHECK-NEON-NEXT: vmov s2, r4
+; CHECK-NEON-NEXT: vmov.i32 q8, #0xffff
+; CHECK-NEON-NEXT: vcvt.s32.f32 s2, s2
+; CHECK-NEON-NEXT: vmov.i32 q9, #0x0
+; CHECK-NEON-NEXT: vmov.32 d9[0], r0
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vcvt.s32.f32 s0, s22
+; CHECK-NEON-NEXT: vmov.32 d12[1], r0
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmin.s32 q10, q6, q8
+; CHECK-NEON-NEXT: vmax.s32 q10, q10, q9
+; CHECK-NEON-NEXT: vmov.32 d9[1], r0
+; CHECK-NEON-NEXT: vmov r0, s2
+; CHECK-NEON-NEXT: vmovn.i32 d1, q10
+; CHECK-NEON-NEXT: vmov.32 d8[1], r0
+; CHECK-NEON-NEXT: vmin.s32 q8, q4, q8
+; CHECK-NEON-NEXT: vmax.s32 q8, q8, q9
+; CHECK-NEON-NEXT: vmovn.i32 d0, q8
+; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
+;
+; CHECK-FP16-LABEL: ustest_f16i16_mm:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: vmovx.f16 s4, s0
+; CHECK-FP16-NEXT: vcvt.s32.f16 s12, s0
+; CHECK-FP16-NEXT: vcvt.s32.f16 s0, s3
+; CHECK-FP16-NEXT: vcvt.s32.f16 s5, s2
+; CHECK-FP16-NEXT: vmov r0, s0
+; CHECK-FP16-NEXT: vcvt.s32.f16 s14, s1
+; CHECK-FP16-NEXT: vmovx.f16 s10, s3
+; CHECK-FP16-NEXT: vmovx.f16 s8, s2
+; CHECK-FP16-NEXT: vcvt.s32.f16 s10, s10
+; CHECK-FP16-NEXT: vcvt.s32.f16 s8, s8
+; CHECK-FP16-NEXT: vmovx.f16 s6, s1
+; CHECK-FP16-NEXT: vcvt.s32.f16 s4, s4
+; CHECK-FP16-NEXT: vcvt.s32.f16 s6, s6
+; CHECK-FP16-NEXT: vmov.i32 q10, #0xffff
+; CHECK-FP16-NEXT: vmov.i32 q11, #0x0
+; CHECK-FP16-NEXT: vmov.32 d17[0], r0
+; CHECK-FP16-NEXT: vmov r0, s5
+; CHECK-FP16-NEXT: vmov.32 d16[0], r0
+; CHECK-FP16-NEXT: vmov r0, s14
+; CHECK-FP16-NEXT: vmov.32 d19[0], r0
+; CHECK-FP16-NEXT: vmov r0, s12
+; CHECK-FP16-NEXT: vmov.32 d18[0], r0
+; CHECK-FP16-NEXT: vmov r0, s10
+; CHECK-FP16-NEXT: vmov.32 d17[1], r0
+; CHECK-FP16-NEXT: vmov r0, s8
+; CHECK-FP16-NEXT: vmov.32 d16[1], r0
+; CHECK-FP16-NEXT: vmov r0, s6
+; CHECK-FP16-NEXT: vmin.s32 q8, q8, q10
+; CHECK-FP16-NEXT: vmax.s32 q8, q8, q11
+; CHECK-FP16-NEXT: vmovn.i32 d1, q8
+; CHECK-FP16-NEXT: vmov.32 d19[1], r0
+; CHECK-FP16-NEXT: vmov r0, s4
+; CHECK-FP16-NEXT: vmov.32 d18[1], r0
+; CHECK-FP16-NEXT: vmin.s32 q9, q9, q10
+; CHECK-FP16-NEXT: vmax.s32 q9, q9, q11
+; CHECK-FP16-NEXT: vmovn.i32 d0, q9
+; CHECK-FP16-NEXT: bx lr
+entry:
+ %conv = fptosi <8 x half> %x to <8 x i32>
+ %spec.store.select = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %conv, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>)
+ %spec.store.select7 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %spec.store.select, <8 x i32> zeroinitializer)
+ %conv6 = trunc <8 x i32> %spec.store.select7 to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+; i64 saturate
+
+define <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i64_mm:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, sp, #4
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: .pad #16
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vorr d0, d9, d9
+; CHECK-NEXT: bl __fixdfti
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: mov r0, r3
+; CHECK-NEXT: mov r10, #0
+; CHECK-NEXT: andne r0, r2, r0, asr #31
+; CHECK-NEXT: mov r11, r1
+; CHECK-NEXT: movmi r10, r3
+; CHECK-NEXT: and r1, r0, r10
+; CHECK-NEXT: cmn r11, #-2147483647
+; CHECK-NEXT: mvn r0, #-2147483648
+; CHECK-NEXT: movlo r0, r11
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: mvn r8, #-2147483648
+; CHECK-NEXT: vorr d0, d8, d8
+; CHECK-NEXT: movmi r8, r11
+; CHECK-NEXT: orrs r2, r2, r3
+; CHECK-NEXT: moveq r8, r0
+; CHECK-NEXT: cmn r10, #1
+; CHECK-NEXT: mov r0, #-2147483648
+; CHECK-NEXT: mov r9, #-2147483648
+; CHECK-NEXT: movgt r0, r8
+; CHECK-NEXT: cmp r8, #-2147483648
+; CHECK-NEXT: movhi r9, r8
+; CHECK-NEXT: cmn r1, #1
+; CHECK-NEXT: mov r6, r3
+; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: mvn r7, #-2147483648
+; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: movne r9, r0
+; CHECK-NEXT: bl __fixdfti
+; CHECK-NEXT: cmn r1, #-2147483647
+; CHECK-NEXT: mvn r5, #0
+; CHECK-NEXT: movlo r5, r0
+; CHECK-NEXT: mvn r4, #0
+; CHECK-NEXT: moveq r5, r0
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: movpl r0, r4
+; CHECK-NEXT: orrs r12, r2, r3
+; CHECK-NEXT: moveq r0, r5
+; CHECK-NEXT: cmn r1, #-2147483647
+; CHECK-NEXT: mvn r5, #-2147483648
+; CHECK-NEXT: movlo r5, r1
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: movmi r7, r1
+; CHECK-NEXT: cmp r12, #0
+; CHECK-NEXT: moveq r7, r5
+; CHECK-NEXT: cmp r7, #-2147483648
+; CHECK-NEXT: mov r1, #0
+; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: movhi r1, r0
+; CHECK-NEXT: mov r12, #0
+; CHECK-NEXT: moveq r1, r0
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: mvn r6, #0
+; CHECK-NEXT: movmi r6, r5
+; CHECK-NEXT: cmn r11, #-2147483647
+; CHECK-NEXT: movlo r4, r5
+; CHECK-NEXT: moveq r4, r5
+; CHECK-NEXT: ldr r5, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: movne r4, r6
+; CHECK-NEXT: cmp r8, #-2147483648
+; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: movhi r6, r4
+; CHECK-NEXT: moveq r6, r4
+; CHECK-NEXT: cmn r10, #1
+; CHECK-NEXT: movle r4, r12
+; CHECK-NEXT: cmn r5, #1
+; CHECK-NEXT: moveq r4, r6
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: vmov.32 d1[0], r4
+; CHECK-NEXT: movmi r6, r3
+; CHECK-NEXT: cmn r6, #1
+; CHECK-NEXT: movle r0, r12
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: andne r3, r2, r3, asr #31
+; CHECK-NEXT: and r2, r3, r6
+; CHECK-NEXT: cmn r2, #1
+; CHECK-NEXT: moveq r0, r1
+; CHECK-NEXT: cmn r6, #1
+; CHECK-NEXT: mov r1, #-2147483648
+; CHECK-NEXT: vmov.32 d0[0], r0
+; CHECK-NEXT: movgt r1, r7
+; CHECK-NEXT: cmp r7, #-2147483648
+; CHECK-NEXT: mov r0, #-2147483648
+; CHECK-NEXT: vmov.32 d1[1], r9
+; CHECK-NEXT: movls r7, r0
+; CHECK-NEXT: cmn r2, #1
+; CHECK-NEXT: movne r7, r1
+; CHECK-NEXT: vmov.32 d0[1], r7
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: add sp, sp, #4
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i64_mm:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vorr d0, d9, d9
+; CHECK-NEXT: bl __fixunsdfti
+; CHECK-NEXT: mov r7, r1
+; CHECK-NEXT: eor r1, r2, #1
+; CHECK-NEXT: subs r2, r2, #1
+; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: sbcs r2, r3, #0
+; CHECK-NEXT: orr r1, r1, r3
+; CHECK-NEXT: movwlo r6, #1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: moveq r7, r6
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: vorr d0, d8, d8
+; CHECK-NEXT: moveq r7, r1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: mov r5, #0
+; CHECK-NEXT: movne r6, r0
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: moveq r6, r1
+; CHECK-NEXT: bl __fixunsdfti
+; CHECK-NEXT: eor r4, r2, #1
+; CHECK-NEXT: subs r2, r2, #1
+; CHECK-NEXT: sbcs r2, r3, #0
+; CHECK-NEXT: orr r4, r4, r3
+; CHECK-NEXT: movwlo r5, #1
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: moveq r0, r5
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: moveq r0, r4
+; CHECK-NEXT: vmov.32 d1[0], r6
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: vmov.32 d0[0], r0
+; CHECK-NEXT: movne r5, r1
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: vmov.32 d1[1], r7
+; CHECK-NEXT: moveq r5, r4
+; CHECK-NEXT: vmov.32 d0[1], r5
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i64_mm:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, sp, #4
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: bl __fixdfti
+; CHECK-NEXT: subs r7, r2, #1
+; CHECK-NEXT: mov r10, r0
+; CHECK-NEXT: eor r0, r2, #1
+; CHECK-NEXT: sbcs r7, r3, #0
+; CHECK-NEXT: mov r5, #0
+; CHECK-NEXT: orr r0, r0, r3
+; CHECK-NEXT: movwlt r5, #1
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: moveq r10, r5
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: moveq r10, r0
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: movne r5, r1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: moveq r5, r0
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, #1
+; CHECK-NEXT: movne r0, r10
+; CHECK-NEXT: mov r8, #1
+; CHECK-NEXT: moveq r0, r10
+; CHECK-NEXT: cmp r2, #1
+; CHECK-NEXT: movlo r1, r2
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: movpl r2, r8
+; CHECK-NEXT: mov r11, #0
+; CHECK-NEXT: moveq r2, r1
+; CHECK-NEXT: movpl r3, r11
+; CHECK-NEXT: rsbs r1, r2, #0
+; CHECK-NEXT: vorr d0, d9, d9
+; CHECK-NEXT: rscs r1, r3, #0
+; CHECK-NEXT: mov r7, #0
+; CHECK-NEXT: movwlt r7, #1
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: moveq r10, r7
+; CHECK-NEXT: orrs r9, r2, r3
+; CHECK-NEXT: moveq r10, r0
+; CHECK-NEXT: bl __fixdfti
+; CHECK-NEXT: eor r4, r2, #1
+; CHECK-NEXT: orr r6, r4, r3
+; CHECK-NEXT: subs r4, r2, #1
+; CHECK-NEXT: sbcs r4, r3, #0
+; CHECK-NEXT: mov r4, #0
+; CHECK-NEXT: movwlt r4, #1
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: moveq r0, r4
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: moveq r0, r6
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: movne r4, r1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: moveq r4, r6
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r6, #1
+; CHECK-NEXT: movne r1, r0
+; CHECK-NEXT: moveq r1, r0
+; CHECK-NEXT: cmp r2, #1
+; CHECK-NEXT: movlo r6, r2
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: movmi r8, r2
+; CHECK-NEXT: movpl r3, r11
+; CHECK-NEXT: moveq r8, r6
+; CHECK-NEXT: rsbs r2, r8, #0
+; CHECK-NEXT: rscs r2, r3, #0
+; CHECK-NEXT: movwlt r11, #1
+; CHECK-NEXT: cmp r11, #0
+; CHECK-NEXT: moveq r0, r11
+; CHECK-NEXT: orrs r2, r8, r3
+; CHECK-NEXT: moveq r0, r1
+; CHECK-NEXT: cmp r11, #0
+; CHECK-NEXT: movne r11, r4
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: vmov.32 d1[0], r0
+; CHECK-NEXT: moveq r11, r4
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: vmov.32 d0[0], r10
+; CHECK-NEXT: movne r7, r5
+; CHECK-NEXT: cmp r9, #0
+; CHECK-NEXT: vmov.32 d1[1], r11
+; CHECK-NEXT: moveq r7, r5
+; CHECK-NEXT: vmov.32 d0[1], r7
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: add sp, sp, #4
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> zeroinitializer)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
+; CHECK-LABEL: stest_f32i64_mm:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, sp, #4
+; CHECK-NEXT: .vsave {d8}
+; CHECK-NEXT: vpush {d8}
+; CHECK-NEXT: .pad #16
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: vmov.f64 d8, d0
+; CHECK-NEXT: vmov.f32 s0, s17
+; CHECK-NEXT: bl __fixsfti
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: mov r0, r3
+; CHECK-NEXT: mov r10, #0
+; CHECK-NEXT: vmov.f32 s0, s16
+; CHECK-NEXT: andne r0, r2, r0, asr #31
+; CHECK-NEXT: mov r11, r1
+; CHECK-NEXT: movmi r10, r3
+; CHECK-NEXT: and r1, r0, r10
+; CHECK-NEXT: cmn r11, #-2147483647
+; CHECK-NEXT: mvn r0, #-2147483648
+; CHECK-NEXT: mvn r8, #-2147483648
+; CHECK-NEXT: movlo r0, r11
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: movmi r8, r11
+; CHECK-NEXT: orrs r2, r2, r3
+; CHECK-NEXT: moveq r8, r0
+; CHECK-NEXT: cmn r10, #1
+; CHECK-NEXT: mov r0, #-2147483648
+; CHECK-NEXT: mov r9, #-2147483648
+; CHECK-NEXT: movgt r0, r8
+; CHECK-NEXT: cmp r8, #-2147483648
+; CHECK-NEXT: movhi r9, r8
+; CHECK-NEXT: cmn r1, #1
+; CHECK-NEXT: mov r6, r3
+; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: mvn r7, #-2147483648
+; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: movne r9, r0
+; CHECK-NEXT: bl __fixsfti
+; CHECK-NEXT: cmn r1, #-2147483647
+; CHECK-NEXT: mvn r5, #0
+; CHECK-NEXT: movlo r5, r0
+; CHECK-NEXT: mvn r4, #0
+; CHECK-NEXT: moveq r5, r0
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: movpl r0, r4
+; CHECK-NEXT: orrs r12, r2, r3
+; CHECK-NEXT: moveq r0, r5
+; CHECK-NEXT: cmn r1, #-2147483647
+; CHECK-NEXT: mvn r5, #-2147483648
+; CHECK-NEXT: movlo r5, r1
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: movmi r7, r1
+; CHECK-NEXT: cmp r12, #0
+; CHECK-NEXT: moveq r7, r5
+; CHECK-NEXT: cmp r7, #-2147483648
+; CHECK-NEXT: mov r1, #0
+; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: movhi r1, r0
+; CHECK-NEXT: mov r12, #0
+; CHECK-NEXT: moveq r1, r0
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: mvn r6, #0
+; CHECK-NEXT: movmi r6, r5
+; CHECK-NEXT: cmn r11, #-2147483647
+; CHECK-NEXT: movlo r4, r5
+; CHECK-NEXT: moveq r4, r5
+; CHECK-NEXT: ldr r5, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: movne r4, r6
+; CHECK-NEXT: cmp r8, #-2147483648
+; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: movhi r6, r4
+; CHECK-NEXT: moveq r6, r4
+; CHECK-NEXT: cmn r10, #1
+; CHECK-NEXT: movle r4, r12
+; CHECK-NEXT: cmn r5, #1
+; CHECK-NEXT: moveq r4, r6
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: vmov.32 d1[0], r4
+; CHECK-NEXT: movmi r6, r3
+; CHECK-NEXT: cmn r6, #1
+; CHECK-NEXT: movle r0, r12
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: andne r3, r2, r3, asr #31
+; CHECK-NEXT: and r2, r3, r6
+; CHECK-NEXT: cmn r2, #1
+; CHECK-NEXT: moveq r0, r1
+; CHECK-NEXT: cmn r6, #1
+; CHECK-NEXT: mov r1, #-2147483648
+; CHECK-NEXT: vmov.32 d0[0], r0
+; CHECK-NEXT: movgt r1, r7
+; CHECK-NEXT: cmp r7, #-2147483648
+; CHECK-NEXT: mov r0, #-2147483648
+; CHECK-NEXT: vmov.32 d1[1], r9
+; CHECK-NEXT: movls r7, r0
+; CHECK-NEXT: cmn r2, #1
+; CHECK-NEXT: movne r7, r1
+; CHECK-NEXT: vmov.32 d0[1], r7
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: vpop {d8}
+; CHECK-NEXT: add sp, sp, #4
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+entry:
+ %conv = fptosi <2 x float> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
+; CHECK-LABEL: utest_f32i64_mm:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: .vsave {d8}
+; CHECK-NEXT: vpush {d8}
+; CHECK-NEXT: vmov.f64 d8, d0
+; CHECK-NEXT: vmov.f32 s0, s17
+; CHECK-NEXT: bl __fixunssfti
+; CHECK-NEXT: vmov.f32 s0, s16
+; CHECK-NEXT: mov r7, r1
+; CHECK-NEXT: eor r1, r2, #1
+; CHECK-NEXT: subs r2, r2, #1
+; CHECK-NEXT: sbcs r2, r3, #0
+; CHECK-NEXT: mov r6, #0
+; CHECK-NEXT: movwlo r6, #1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: orr r1, r1, r3
+; CHECK-NEXT: moveq r7, r6
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: mov r5, #0
+; CHECK-NEXT: moveq r7, r1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: movne r6, r0
+; CHECK-NEXT: cmp r1, #0
+; CHECK-NEXT: moveq r6, r1
+; CHECK-NEXT: bl __fixunssfti
+; CHECK-NEXT: eor r4, r2, #1
+; CHECK-NEXT: subs r2, r2, #1
+; CHECK-NEXT: sbcs r2, r3, #0
+; CHECK-NEXT: orr r4, r4, r3
+; CHECK-NEXT: movwlo r5, #1
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: moveq r0, r5
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: moveq r0, r4
+; CHECK-NEXT: vmov.32 d1[0], r6
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: vmov.32 d0[0], r0
+; CHECK-NEXT: movne r5, r1
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: vmov.32 d1[1], r7
+; CHECK-NEXT: moveq r5, r4
+; CHECK-NEXT: vmov.32 d0[1], r5
+; CHECK-NEXT: vpop {d8}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
+entry:
+ %conv = fptoui <2 x float> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
+; CHECK-LABEL: ustest_f32i64_mm:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, sp, #4
+; CHECK-NEXT: .vsave {d8}
+; CHECK-NEXT: vpush {d8}
+; CHECK-NEXT: vmov.f64 d8, d0
+; CHECK-NEXT: bl __fixsfti
+; CHECK-NEXT: subs r7, r2, #1
+; CHECK-NEXT: mov r10, r0
+; CHECK-NEXT: eor r0, r2, #1
+; CHECK-NEXT: sbcs r7, r3, #0
+; CHECK-NEXT: mov r5, #0
+; CHECK-NEXT: orr r0, r0, r3
+; CHECK-NEXT: movwlt r5, #1
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: moveq r10, r5
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: moveq r10, r0
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: movne r5, r1
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: moveq r5, r0
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: vmov.f32 s0, s17
+; CHECK-NEXT: movne r0, r10
+; CHECK-NEXT: mov r1, #1
+; CHECK-NEXT: moveq r0, r10
+; CHECK-NEXT: cmp r2, #1
+; CHECK-NEXT: movlo r1, r2
+; CHECK-NEXT: mov r8, #1
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: mov r11, #0
+; CHECK-NEXT: movpl r2, r8
+; CHECK-NEXT: movpl r3, r11
+; CHECK-NEXT: moveq r2, r1
+; CHECK-NEXT: rsbs r1, r2, #0
+; CHECK-NEXT: rscs r1, r3, #0
+; CHECK-NEXT: mov r7, #0
+; CHECK-NEXT: movwlt r7, #1
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: moveq r10, r7
+; CHECK-NEXT: orrs r9, r2, r3
+; CHECK-NEXT: moveq r10, r0
+; CHECK-NEXT: bl __fixsfti
+; CHECK-NEXT: eor r4, r2, #1
+; CHECK-NEXT: orr r6, r4, r3
+; CHECK-NEXT: subs r4, r2, #1
+; CHECK-NEXT: sbcs r4, r3, #0
+; CHECK-NEXT: mov r4, #0
+; CHECK-NEXT: movwlt r4, #1
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: moveq r0, r4
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: moveq r0, r6
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: movne r4, r1
+; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: moveq r4, r6
+; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r6, #1
+; CHECK-NEXT: movne r1, r0
+; CHECK-NEXT: moveq r1, r0
+; CHECK-NEXT: cmp r2, #1
+; CHECK-NEXT: movlo r6, r2
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: movmi r8, r2
+; CHECK-NEXT: movpl r3, r11
+; CHECK-NEXT: moveq r8, r6
+; CHECK-NEXT: rsbs r2, r8, #0
+; CHECK-NEXT: rscs r2, r3, #0
+; CHECK-NEXT: movwlt r11, #1
+; CHECK-NEXT: cmp r11, #0
+; CHECK-NEXT: moveq r0, r11
+; CHECK-NEXT: orrs r2, r8, r3
+; CHECK-NEXT: moveq r0, r1
+; CHECK-NEXT: cmp r11, #0
+; CHECK-NEXT: movne r11, r4
+; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: vmov.32 d1[0], r0
+; CHECK-NEXT: moveq r11, r4
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: vmov.32 d0[0], r10
+; CHECK-NEXT: movne r7, r5
+; CHECK-NEXT: cmp r9, #0
+; CHECK-NEXT: vmov.32 d1[1], r11
+; CHECK-NEXT: moveq r7, r5
+; CHECK-NEXT: vmov.32 d0[1], r7
+; CHECK-NEXT: vpop {d8}
+; CHECK-NEXT: add sp, sp, #4
+; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+entry:
+ %conv = fptosi <2 x float> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> zeroinitializer)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
+; CHECK-NEON-LABEL: stest_f16i64_mm:
+; CHECK-NEON: @ %bb.0: @ %entry
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEON-NEXT: .pad #4
+; CHECK-NEON-NEXT: sub sp, sp, #4
+; CHECK-NEON-NEXT: .vsave {d8}
+; CHECK-NEON-NEXT: vpush {d8}
+; CHECK-NEON-NEXT: .pad #16
+; CHECK-NEON-NEXT: sub sp, sp, #16
+; CHECK-NEON-NEXT: vmov r0, s1
+; CHECK-NEON-NEXT: vmov.f32 s16, s0
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: bl __fixsfti
+; CHECK-NEON-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEON-NEXT: cmp r3, #0
+; CHECK-NEON-NEXT: mov r0, r3
+; CHECK-NEON-NEXT: mov r10, #0
+; CHECK-NEON-NEXT: andne r0, r2, r0, asr #31
+; CHECK-NEON-NEXT: mov r11, r1
+; CHECK-NEON-NEXT: movmi r10, r3
+; CHECK-NEON-NEXT: and r1, r0, r10
+; CHECK-NEON-NEXT: cmn r11, #-2147483647
+; CHECK-NEON-NEXT: mvn r0, #-2147483648
+; CHECK-NEON-NEXT: movlo r0, r11
+; CHECK-NEON-NEXT: cmp r3, #0
+; CHECK-NEON-NEXT: mvn r8, #-2147483648
+; CHECK-NEON-NEXT: mov r9, #-2147483648
+; CHECK-NEON-NEXT: movmi r8, r11
+; CHECK-NEON-NEXT: orrs r2, r2, r3
+; CHECK-NEON-NEXT: moveq r8, r0
+; CHECK-NEON-NEXT: cmn r10, #1
+; CHECK-NEON-NEXT: mov r0, #-2147483648
+; CHECK-NEON-NEXT: mov r6, r3
+; CHECK-NEON-NEXT: movgt r0, r8
+; CHECK-NEON-NEXT: cmp r8, #-2147483648
+; CHECK-NEON-NEXT: movhi r9, r8
+; CHECK-NEON-NEXT: cmn r1, #1
+; CHECK-NEON-NEXT: movne r9, r0
+; CHECK-NEON-NEXT: vmov r0, s16
+; CHECK-NEON-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; CHECK-NEON-NEXT: mvn r7, #-2147483648
+; CHECK-NEON-NEXT: str r2, [sp, #4] @ 4-byte Spill
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: bl __fixsfti
+; CHECK-NEON-NEXT: cmn r1, #-2147483647
+; CHECK-NEON-NEXT: mvn r5, #0
+; CHECK-NEON-NEXT: movlo r5, r0
+; CHECK-NEON-NEXT: mvn r4, #0
+; CHECK-NEON-NEXT: moveq r5, r0
+; CHECK-NEON-NEXT: cmp r3, #0
+; CHECK-NEON-NEXT: movpl r0, r4
+; CHECK-NEON-NEXT: orrs r12, r2, r3
+; CHECK-NEON-NEXT: moveq r0, r5
+; CHECK-NEON-NEXT: cmn r1, #-2147483647
+; CHECK-NEON-NEXT: mvn r5, #-2147483648
+; CHECK-NEON-NEXT: movlo r5, r1
+; CHECK-NEON-NEXT: cmp r3, #0
+; CHECK-NEON-NEXT: movmi r7, r1
+; CHECK-NEON-NEXT: cmp r12, #0
+; CHECK-NEON-NEXT: moveq r7, r5
+; CHECK-NEON-NEXT: cmp r7, #-2147483648
+; CHECK-NEON-NEXT: mov r1, #0
+; CHECK-NEON-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
+; CHECK-NEON-NEXT: movhi r1, r0
+; CHECK-NEON-NEXT: mov r12, #0
+; CHECK-NEON-NEXT: moveq r1, r0
+; CHECK-NEON-NEXT: cmp r6, #0
+; CHECK-NEON-NEXT: mvn r6, #0
+; CHECK-NEON-NEXT: movmi r6, r5
+; CHECK-NEON-NEXT: cmn r11, #-2147483647
+; CHECK-NEON-NEXT: movlo r4, r5
+; CHECK-NEON-NEXT: moveq r4, r5
+; CHECK-NEON-NEXT: ldr r5, [sp, #4] @ 4-byte Reload
+; CHECK-NEON-NEXT: cmp r5, #0
+; CHECK-NEON-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
+; CHECK-NEON-NEXT: movne r4, r6
+; CHECK-NEON-NEXT: cmp r8, #-2147483648
+; CHECK-NEON-NEXT: mov r6, #0
+; CHECK-NEON-NEXT: movhi r6, r4
+; CHECK-NEON-NEXT: moveq r6, r4
+; CHECK-NEON-NEXT: cmn r10, #1
+; CHECK-NEON-NEXT: movle r4, r12
+; CHECK-NEON-NEXT: cmn r5, #1
+; CHECK-NEON-NEXT: moveq r4, r6
+; CHECK-NEON-NEXT: cmp r3, #0
+; CHECK-NEON-NEXT: mov r6, #0
+; CHECK-NEON-NEXT: vmov.32 d1[0], r4
+; CHECK-NEON-NEXT: movmi r6, r3
+; CHECK-NEON-NEXT: cmn r6, #1
+; CHECK-NEON-NEXT: movle r0, r12
+; CHECK-NEON-NEXT: cmp r3, #0
+; CHECK-NEON-NEXT: andne r3, r2, r3, asr #31
+; CHECK-NEON-NEXT: and r2, r3, r6
+; CHECK-NEON-NEXT: cmn r2, #1
+; CHECK-NEON-NEXT: moveq r0, r1
+; CHECK-NEON-NEXT: cmn r6, #1
+; CHECK-NEON-NEXT: mov r1, #-2147483648
+; CHECK-NEON-NEXT: vmov.32 d0[0], r0
+; CHECK-NEON-NEXT: movgt r1, r7
+; CHECK-NEON-NEXT: cmp r7, #-2147483648
+; CHECK-NEON-NEXT: mov r0, #-2147483648
+; CHECK-NEON-NEXT: vmov.32 d1[1], r9
+; CHECK-NEON-NEXT: movls r7, r0
+; CHECK-NEON-NEXT: cmn r2, #1
+; CHECK-NEON-NEXT: movne r7, r1
+; CHECK-NEON-NEXT: vmov.32 d0[1], r7
+; CHECK-NEON-NEXT: add sp, sp, #16
+; CHECK-NEON-NEXT: vpop {d8}
+; CHECK-NEON-NEXT: add sp, sp, #4
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; CHECK-FP16-LABEL: stest_f16i64_mm:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-FP16-NEXT: .pad #4
+; CHECK-FP16-NEXT: sub sp, sp, #4
+; CHECK-FP16-NEXT: .vsave {d8}
+; CHECK-FP16-NEXT: vpush {d8}
+; CHECK-FP16-NEXT: .pad #16
+; CHECK-FP16-NEXT: sub sp, sp, #16
+; CHECK-FP16-NEXT: vmov.u16 r0, d0[1]
+; CHECK-FP16-NEXT: vorr d8, d0, d0
+; CHECK-FP16-NEXT: vmov s0, r0
+; CHECK-FP16-NEXT: bl __fixhfti
+; CHECK-FP16-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-FP16-NEXT: cmp r3, #0
+; CHECK-FP16-NEXT: mov r0, r3
+; CHECK-FP16-NEXT: mov r10, #0
+; CHECK-FP16-NEXT: andne r0, r2, r0, asr #31
+; CHECK-FP16-NEXT: mov r11, r1
+; CHECK-FP16-NEXT: movmi r10, r3
+; CHECK-FP16-NEXT: and r1, r0, r10
+; CHECK-FP16-NEXT: cmn r11, #-2147483647
+; CHECK-FP16-NEXT: mvn r0, #-2147483648
+; CHECK-FP16-NEXT: movlo r0, r11
+; CHECK-FP16-NEXT: cmp r3, #0
+; CHECK-FP16-NEXT: mvn r8, #-2147483648
+; CHECK-FP16-NEXT: mov r9, #-2147483648
+; CHECK-FP16-NEXT: movmi r8, r11
+; CHECK-FP16-NEXT: orrs r2, r2, r3
+; CHECK-FP16-NEXT: moveq r8, r0
+; CHECK-FP16-NEXT: cmn r10, #1
+; CHECK-FP16-NEXT: mov r0, #-2147483648
+; CHECK-FP16-NEXT: mov r6, r3
+; CHECK-FP16-NEXT: movgt r0, r8
+; CHECK-FP16-NEXT: cmp r8, #-2147483648
+; CHECK-FP16-NEXT: movhi r9, r8
+; CHECK-FP16-NEXT: cmn r1, #1
+; CHECK-FP16-NEXT: movne r9, r0
+; CHECK-FP16-NEXT: vmov.u16 r0, d8[0]
+; CHECK-FP16-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; CHECK-FP16-NEXT: mvn r7, #-2147483648
+; CHECK-FP16-NEXT: str r2, [sp, #4] @ 4-byte Spill
+; CHECK-FP16-NEXT: vmov s0, r0
+; CHECK-FP16-NEXT: bl __fixhfti
+; CHECK-FP16-NEXT: cmn r1, #-2147483647
+; CHECK-FP16-NEXT: mvn r5, #0
+; CHECK-FP16-NEXT: movlo r5, r0
+; CHECK-FP16-NEXT: mvn r4, #0
+; CHECK-FP16-NEXT: moveq r5, r0
+; CHECK-FP16-NEXT: cmp r3, #0
+; CHECK-FP16-NEXT: movpl r0, r4
+; CHECK-FP16-NEXT: orrs r12, r2, r3
+; CHECK-FP16-NEXT: moveq r0, r5
+; CHECK-FP16-NEXT: cmn r1, #-2147483647
+; CHECK-FP16-NEXT: mvn r5, #-2147483648
+; CHECK-FP16-NEXT: movlo r5, r1
+; CHECK-FP16-NEXT: cmp r3, #0
+; CHECK-FP16-NEXT: movmi r7, r1
+; CHECK-FP16-NEXT: cmp r12, #0
+; CHECK-FP16-NEXT: moveq r7, r5
+; CHECK-FP16-NEXT: cmp r7, #-2147483648
+; CHECK-FP16-NEXT: mov r1, #0
+; CHECK-FP16-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
+; CHECK-FP16-NEXT: movhi r1, r0
+; CHECK-FP16-NEXT: mov r12, #0
+; CHECK-FP16-NEXT: moveq r1, r0
+; CHECK-FP16-NEXT: cmp r6, #0
+; CHECK-FP16-NEXT: mvn r6, #0
+; CHECK-FP16-NEXT: movmi r6, r5
+; CHECK-FP16-NEXT: cmn r11, #-2147483647
+; CHECK-FP16-NEXT: movlo r4, r5
+; CHECK-FP16-NEXT: moveq r4, r5
+; CHECK-FP16-NEXT: ldr r5, [sp, #4] @ 4-byte Reload
+; CHECK-FP16-NEXT: cmp r5, #0
+; CHECK-FP16-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
+; CHECK-FP16-NEXT: movne r4, r6
+; CHECK-FP16-NEXT: cmp r8, #-2147483648
+; CHECK-FP16-NEXT: mov r6, #0
+; CHECK-FP16-NEXT: movhi r6, r4
+; CHECK-FP16-NEXT: moveq r6, r4
+; CHECK-FP16-NEXT: cmn r10, #1
+; CHECK-FP16-NEXT: movle r4, r12
+; CHECK-FP16-NEXT: cmn r5, #1
+; CHECK-FP16-NEXT: moveq r4, r6
+; CHECK-FP16-NEXT: cmp r3, #0
+; CHECK-FP16-NEXT: mov r6, #0
+; CHECK-FP16-NEXT: vmov.32 d1[0], r4
+; CHECK-FP16-NEXT: movmi r6, r3
+; CHECK-FP16-NEXT: cmn r6, #1
+; CHECK-FP16-NEXT: movle r0, r12
+; CHECK-FP16-NEXT: cmp r3, #0
+; CHECK-FP16-NEXT: andne r3, r2, r3, asr #31
+; CHECK-FP16-NEXT: and r2, r3, r6
+; CHECK-FP16-NEXT: cmn r2, #1
+; CHECK-FP16-NEXT: moveq r0, r1
+; CHECK-FP16-NEXT: cmn r6, #1
+; CHECK-FP16-NEXT: mov r1, #-2147483648
+; CHECK-FP16-NEXT: vmov.32 d0[0], r0
+; CHECK-FP16-NEXT: movgt r1, r7
+; CHECK-FP16-NEXT: cmp r7, #-2147483648
+; CHECK-FP16-NEXT: mov r0, #-2147483648
+; CHECK-FP16-NEXT: vmov.32 d1[1], r9
+; CHECK-FP16-NEXT: movls r7, r0
+; CHECK-FP16-NEXT: cmn r2, #1
+; CHECK-FP16-NEXT: movne r7, r1
+; CHECK-FP16-NEXT: vmov.32 d0[1], r7
+; CHECK-FP16-NEXT: add sp, sp, #16
+; CHECK-FP16-NEXT: vpop {d8}
+; CHECK-FP16-NEXT: add sp, sp, #4
+; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+entry:
+ %conv = fptosi <2 x half> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
+; CHECK-NEON-LABEL: utesth_f16i64_mm:
+; CHECK-NEON: @ %bb.0: @ %entry
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEON-NEXT: .vsave {d8}
+; CHECK-NEON-NEXT: vpush {d8}
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.f32 s16, s1
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: mov r5, r0
+; CHECK-NEON-NEXT: vmov r0, s16
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: bl __fixunssfti
+; CHECK-NEON-NEXT: mov r7, r1
+; CHECK-NEON-NEXT: eor r1, r2, #1
+; CHECK-NEON-NEXT: subs r2, r2, #1
+; CHECK-NEON-NEXT: mov r6, #0
+; CHECK-NEON-NEXT: sbcs r2, r3, #0
+; CHECK-NEON-NEXT: orr r1, r1, r3
+; CHECK-NEON-NEXT: movwlo r6, #1
+; CHECK-NEON-NEXT: cmp r6, #0
+; CHECK-NEON-NEXT: moveq r7, r6
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: vmov s0, r5
+; CHECK-NEON-NEXT: moveq r7, r1
+; CHECK-NEON-NEXT: cmp r6, #0
+; CHECK-NEON-NEXT: mov r5, #0
+; CHECK-NEON-NEXT: movne r6, r0
+; CHECK-NEON-NEXT: cmp r1, #0
+; CHECK-NEON-NEXT: moveq r6, r1
+; CHECK-NEON-NEXT: bl __fixunssfti
+; CHECK-NEON-NEXT: eor r4, r2, #1
+; CHECK-NEON-NEXT: subs r2, r2, #1
+; CHECK-NEON-NEXT: sbcs r2, r3, #0
+; CHECK-NEON-NEXT: orr r4, r4, r3
+; CHECK-NEON-NEXT: movwlo r5, #1
+; CHECK-NEON-NEXT: cmp r5, #0
+; CHECK-NEON-NEXT: moveq r0, r5
+; CHECK-NEON-NEXT: cmp r4, #0
+; CHECK-NEON-NEXT: moveq r0, r4
+; CHECK-NEON-NEXT: vmov.32 d1[0], r6
+; CHECK-NEON-NEXT: cmp r5, #0
+; CHECK-NEON-NEXT: vmov.32 d0[0], r0
+; CHECK-NEON-NEXT: movne r5, r1
+; CHECK-NEON-NEXT: cmp r4, #0
+; CHECK-NEON-NEXT: vmov.32 d1[1], r7
+; CHECK-NEON-NEXT: moveq r5, r4
+; CHECK-NEON-NEXT: vmov.32 d0[1], r5
+; CHECK-NEON-NEXT: vpop {d8}
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc}
+;
+; CHECK-FP16-LABEL: utesth_f16i64_mm:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-FP16-NEXT: vmov.u16 r0, d0[1]
+; CHECK-FP16-NEXT: vmov.u16 r5, d0[0]
+; CHECK-FP16-NEXT: vmov s0, r0
+; CHECK-FP16-NEXT: bl __fixunshfti
+; CHECK-FP16-NEXT: mov r7, r1
+; CHECK-FP16-NEXT: eor r1, r2, #1
+; CHECK-FP16-NEXT: subs r2, r2, #1
+; CHECK-FP16-NEXT: mov r6, #0
+; CHECK-FP16-NEXT: sbcs r2, r3, #0
+; CHECK-FP16-NEXT: orr r1, r1, r3
+; CHECK-FP16-NEXT: movwlo r6, #1
+; CHECK-FP16-NEXT: cmp r6, #0
+; CHECK-FP16-NEXT: moveq r7, r6
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: vmov s0, r5
+; CHECK-FP16-NEXT: moveq r7, r1
+; CHECK-FP16-NEXT: cmp r6, #0
+; CHECK-FP16-NEXT: mov r5, #0
+; CHECK-FP16-NEXT: movne r6, r0
+; CHECK-FP16-NEXT: cmp r1, #0
+; CHECK-FP16-NEXT: moveq r6, r1
+; CHECK-FP16-NEXT: bl __fixunshfti
+; CHECK-FP16-NEXT: eor r4, r2, #1
+; CHECK-FP16-NEXT: subs r2, r2, #1
+; CHECK-FP16-NEXT: sbcs r2, r3, #0
+; CHECK-FP16-NEXT: orr r4, r4, r3
+; CHECK-FP16-NEXT: movwlo r5, #1
+; CHECK-FP16-NEXT: cmp r5, #0
+; CHECK-FP16-NEXT: moveq r0, r5
+; CHECK-FP16-NEXT: cmp r4, #0
+; CHECK-FP16-NEXT: moveq r0, r4
+; CHECK-FP16-NEXT: vmov.32 d1[0], r6
+; CHECK-FP16-NEXT: cmp r5, #0
+; CHECK-FP16-NEXT: vmov.32 d0[0], r0
+; CHECK-FP16-NEXT: movne r5, r1
+; CHECK-FP16-NEXT: cmp r4, #0
+; CHECK-FP16-NEXT: vmov.32 d1[1], r7
+; CHECK-FP16-NEXT: moveq r5, r4
+; CHECK-FP16-NEXT: vmov.32 d0[1], r5
+; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r11, pc}
+entry:
+ %conv = fptoui <2 x half> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
+; CHECK-NEON-LABEL: ustest_f16i64_mm:
+; CHECK-NEON: @ %bb.0: @ %entry
+; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEON-NEXT: .pad #4
+; CHECK-NEON-NEXT: sub sp, sp, #4
+; CHECK-NEON-NEXT: .vsave {d8}
+; CHECK-NEON-NEXT: vpush {d8}
+; CHECK-NEON-NEXT: vmov r0, s0
+; CHECK-NEON-NEXT: vmov.f32 s16, s1
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: bl __fixsfti
+; CHECK-NEON-NEXT: mov r8, r0
+; CHECK-NEON-NEXT: eor r0, r2, #1
+; CHECK-NEON-NEXT: mov r5, r2
+; CHECK-NEON-NEXT: subs r2, r2, #1
+; CHECK-NEON-NEXT: sbcs r2, r3, #0
+; CHECK-NEON-NEXT: mov r4, #0
+; CHECK-NEON-NEXT: movwlt r4, #1
+; CHECK-NEON-NEXT: cmp r4, #0
+; CHECK-NEON-NEXT: orr r0, r0, r3
+; CHECK-NEON-NEXT: moveq r8, r4
+; CHECK-NEON-NEXT: cmp r0, #0
+; CHECK-NEON-NEXT: mov r10, #1
+; CHECK-NEON-NEXT: moveq r8, r0
+; CHECK-NEON-NEXT: cmp r4, #0
+; CHECK-NEON-NEXT: movne r4, r1
+; CHECK-NEON-NEXT: cmp r0, #0
+; CHECK-NEON-NEXT: moveq r4, r0
+; CHECK-NEON-NEXT: cmp r4, #0
+; CHECK-NEON-NEXT: mov r7, r4
+; CHECK-NEON-NEXT: mov r0, #1
+; CHECK-NEON-NEXT: movne r7, r8
+; CHECK-NEON-NEXT: mov r6, r3
+; CHECK-NEON-NEXT: moveq r7, r8
+; CHECK-NEON-NEXT: cmp r5, #1
+; CHECK-NEON-NEXT: movlo r0, r5
+; CHECK-NEON-NEXT: cmp r3, #0
+; CHECK-NEON-NEXT: movpl r5, r10
+; CHECK-NEON-NEXT: mov r9, #0
+; CHECK-NEON-NEXT: moveq r5, r0
+; CHECK-NEON-NEXT: movpl r6, r9
+; CHECK-NEON-NEXT: rsbs r0, r5, #0
+; CHECK-NEON-NEXT: mov r11, #0
+; CHECK-NEON-NEXT: rscs r0, r6, #0
+; CHECK-NEON-NEXT: vmov r0, s16
+; CHECK-NEON-NEXT: movwlt r11, #1
+; CHECK-NEON-NEXT: cmp r11, #0
+; CHECK-NEON-NEXT: moveq r8, r11
+; CHECK-NEON-NEXT: bl __aeabi_h2f
+; CHECK-NEON-NEXT: vmov s0, r0
+; CHECK-NEON-NEXT: orrs r5, r5, r6
+; CHECK-NEON-NEXT: moveq r8, r7
+; CHECK-NEON-NEXT: bl __fixsfti
+; CHECK-NEON-NEXT: subs r6, r2, #1
+; CHECK-NEON-NEXT: eor r7, r2, #1
+; CHECK-NEON-NEXT: sbcs r6, r3, #0
+; CHECK-NEON-NEXT: orr r7, r7, r3
+; CHECK-NEON-NEXT: mov r6, #0
+; CHECK-NEON-NEXT: movwlt r6, #1
+; CHECK-NEON-NEXT: cmp r6, #0
+; CHECK-NEON-NEXT: moveq r0, r6
+; CHECK-NEON-NEXT: cmp r7, #0
+; CHECK-NEON-NEXT: moveq r0, r7
+; CHECK-NEON-NEXT: cmp r6, #0
+; CHECK-NEON-NEXT: movne r6, r1
+; CHECK-NEON-NEXT: cmp r7, #0
+; CHECK-NEON-NEXT: moveq r6, r7
+; CHECK-NEON-NEXT: cmp r6, #0
+; CHECK-NEON-NEXT: mov r1, r6
+; CHECK-NEON-NEXT: mov r7, #1
+; CHECK-NEON-NEXT: movne r1, r0
+; CHECK-NEON-NEXT: moveq r1, r0
+; CHECK-NEON-NEXT: cmp r2, #1
+; CHECK-NEON-NEXT: movlo r7, r2
+; CHECK-NEON-NEXT: cmp r3, #0
+; CHECK-NEON-NEXT: movmi r10, r2
+; CHECK-NEON-NEXT: movpl r3, r9
+; CHECK-NEON-NEXT: moveq r10, r7
+; CHECK-NEON-NEXT: rsbs r2, r10, #0
+; CHECK-NEON-NEXT: rscs r2, r3, #0
+; CHECK-NEON-NEXT: movwlt r9, #1
+; CHECK-NEON-NEXT: cmp r9, #0
+; CHECK-NEON-NEXT: moveq r0, r9
+; CHECK-NEON-NEXT: orrs r2, r10, r3
+; CHECK-NEON-NEXT: moveq r0, r1
+; CHECK-NEON-NEXT: cmp r9, #0
+; CHECK-NEON-NEXT: movne r9, r6
+; CHECK-NEON-NEXT: cmp r2, #0
+; CHECK-NEON-NEXT: vmov.32 d1[0], r0
+; CHECK-NEON-NEXT: moveq r9, r6
+; CHECK-NEON-NEXT: cmp r11, #0
+; CHECK-NEON-NEXT: vmov.32 d0[0], r8
+; CHECK-NEON-NEXT: movne r11, r4
+; CHECK-NEON-NEXT: cmp r5, #0
+; CHECK-NEON-NEXT: vmov.32 d1[1], r9
+; CHECK-NEON-NEXT: moveq r11, r4
+; CHECK-NEON-NEXT: vmov.32 d0[1], r11
+; CHECK-NEON-NEXT: vpop {d8}
+; CHECK-NEON-NEXT: add sp, sp, #4
+; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; CHECK-FP16-LABEL: ustest_f16i64_mm:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-FP16-NEXT: .pad #4
+; CHECK-FP16-NEXT: sub sp, sp, #4
+; CHECK-FP16-NEXT: .vsave {d8}
+; CHECK-FP16-NEXT: vpush {d8}
+; CHECK-FP16-NEXT: vmov.u16 r0, d0[0]
+; CHECK-FP16-NEXT: vorr d8, d0, d0
+; CHECK-FP16-NEXT: vmov s0, r0
+; CHECK-FP16-NEXT: bl __fixhfti
+; CHECK-FP16-NEXT: subs r7, r2, #1
+; CHECK-FP16-NEXT: mov r10, r0
+; CHECK-FP16-NEXT: eor r0, r2, #1
+; CHECK-FP16-NEXT: sbcs r7, r3, #0
+; CHECK-FP16-NEXT: mov r5, #0
+; CHECK-FP16-NEXT: orr r0, r0, r3
+; CHECK-FP16-NEXT: movwlt r5, #1
+; CHECK-FP16-NEXT: cmp r5, #0
+; CHECK-FP16-NEXT: moveq r10, r5
+; CHECK-FP16-NEXT: cmp r0, #0
+; CHECK-FP16-NEXT: moveq r10, r0
+; CHECK-FP16-NEXT: cmp r5, #0
+; CHECK-FP16-NEXT: movne r5, r1
+; CHECK-FP16-NEXT: cmp r0, #0
+; CHECK-FP16-NEXT: moveq r5, r0
+; CHECK-FP16-NEXT: cmp r5, #0
+; CHECK-FP16-NEXT: mov r0, r5
+; CHECK-FP16-NEXT: mov r1, #1
+; CHECK-FP16-NEXT: movne r0, r10
+; CHECK-FP16-NEXT: mov r8, #1
+; CHECK-FP16-NEXT: moveq r0, r10
+; CHECK-FP16-NEXT: cmp r2, #1
+; CHECK-FP16-NEXT: movlo r1, r2
+; CHECK-FP16-NEXT: cmp r3, #0
+; CHECK-FP16-NEXT: movpl r2, r8
+; CHECK-FP16-NEXT: mov r11, #0
+; CHECK-FP16-NEXT: moveq r2, r1
+; CHECK-FP16-NEXT: movpl r3, r11
+; CHECK-FP16-NEXT: rsbs r1, r2, #0
+; CHECK-FP16-NEXT: mov r7, #0
+; CHECK-FP16-NEXT: rscs r1, r3, #0
+; CHECK-FP16-NEXT: vmov.u16 r1, d8[1]
+; CHECK-FP16-NEXT: movwlt r7, #1
+; CHECK-FP16-NEXT: cmp r7, #0
+; CHECK-FP16-NEXT: moveq r10, r7
+; CHECK-FP16-NEXT: orrs r9, r2, r3
+; CHECK-FP16-NEXT: moveq r10, r0
+; CHECK-FP16-NEXT: vmov s0, r1
+; CHECK-FP16-NEXT: bl __fixhfti
+; CHECK-FP16-NEXT: eor r4, r2, #1
+; CHECK-FP16-NEXT: orr r6, r4, r3
+; CHECK-FP16-NEXT: subs r4, r2, #1
+; CHECK-FP16-NEXT: sbcs r4, r3, #0
+; CHECK-FP16-NEXT: mov r4, #0
+; CHECK-FP16-NEXT: movwlt r4, #1
+; CHECK-FP16-NEXT: cmp r4, #0
+; CHECK-FP16-NEXT: moveq r0, r4
+; CHECK-FP16-NEXT: cmp r6, #0
+; CHECK-FP16-NEXT: moveq r0, r6
+; CHECK-FP16-NEXT: cmp r4, #0
+; CHECK-FP16-NEXT: movne r4, r1
+; CHECK-FP16-NEXT: cmp r6, #0
+; CHECK-FP16-NEXT: moveq r4, r6
+; CHECK-FP16-NEXT: cmp r4, #0
+; CHECK-FP16-NEXT: mov r1, r4
+; CHECK-FP16-NEXT: mov r6, #1
+; CHECK-FP16-NEXT: movne r1, r0
+; CHECK-FP16-NEXT: moveq r1, r0
+; CHECK-FP16-NEXT: cmp r2, #1
+; CHECK-FP16-NEXT: movlo r6, r2
+; CHECK-FP16-NEXT: cmp r3, #0
+; CHECK-FP16-NEXT: movmi r8, r2
+; CHECK-FP16-NEXT: movpl r3, r11
+; CHECK-FP16-NEXT: moveq r8, r6
+; CHECK-FP16-NEXT: rsbs r2, r8, #0
+; CHECK-FP16-NEXT: rscs r2, r3, #0
+; CHECK-FP16-NEXT: movwlt r11, #1
+; CHECK-FP16-NEXT: cmp r11, #0
+; CHECK-FP16-NEXT: moveq r0, r11
+; CHECK-FP16-NEXT: orrs r2, r8, r3
+; CHECK-FP16-NEXT: moveq r0, r1
+; CHECK-FP16-NEXT: cmp r11, #0
+; CHECK-FP16-NEXT: movne r11, r4
+; CHECK-FP16-NEXT: cmp r2, #0
+; CHECK-FP16-NEXT: vmov.32 d1[0], r0
+; CHECK-FP16-NEXT: moveq r11, r4
+; CHECK-FP16-NEXT: cmp r7, #0
+; CHECK-FP16-NEXT: vmov.32 d0[0], r10
+; CHECK-FP16-NEXT: movne r7, r5
+; CHECK-FP16-NEXT: cmp r9, #0
+; CHECK-FP16-NEXT: vmov.32 d1[1], r11
+; CHECK-FP16-NEXT: moveq r7, r5
+; CHECK-FP16-NEXT: vmov.32 d0[1], r7
+; CHECK-FP16-NEXT: vpop {d8}
+; CHECK-FP16-NEXT: add sp, sp, #4
+; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+entry:
+ %conv = fptosi <2 x half> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> zeroinitializer)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)
+declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
+declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>)
+declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>)
+declare <8 x i32> @llvm.umin.v8i32(<8 x i32>, <8 x i32>)
+declare <2 x i64> @llvm.smin.v2i64(<2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.smax.v2i64(<2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.umin.v2i64(<2 x i64>, <2 x i64>)
+declare <4 x i64> @llvm.smin.v4i64(<4 x i64>, <4 x i64>)
+declare <4 x i64> @llvm.smax.v4i64(<4 x i64>, <4 x i64>)
+declare <4 x i64> @llvm.umin.v4i64(<4 x i64>, <4 x i64>)
+declare <2 x i128> @llvm.smin.v2i128(<2 x i128>, <2 x i128>)
+declare <2 x i128> @llvm.smax.v2i128(<2 x i128>, <2 x i128>)
+declare <2 x i128> @llvm.umin.v2i128(<2 x i128>, <2 x i128>)
diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
new file mode 100644
index 0000000000000..f32dab798a4cf
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
@@ -0,0 +1,4384 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32,RV32IF %s
+; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64,RV64IF %s
+; RUN: llc -mtriple=riscv32 -mattr=+f,+d -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32,RV32IFD %s
+; RUN: llc -mtriple=riscv64 -mattr=+f,+d -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64,RV64IFD %s
+
+; i32 saturate
+
+define i32 @stest_f64i32(double %x) {
+; RV32-LABEL: stest_f64i32:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __fixdfdi at plt
+; RV32-NEXT: lui a2, 524288
+; RV32-NEXT: addi a3, a2, -1
+; RV32-NEXT: beqz a1, .LBB0_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: slti a4, a1, 0
+; RV32-NEXT: beqz a4, .LBB0_3
+; RV32-NEXT: j .LBB0_4
+; RV32-NEXT: .LBB0_2:
+; RV32-NEXT: sltu a4, a0, a3
+; RV32-NEXT: bnez a4, .LBB0_4
+; RV32-NEXT: .LBB0_3: # %entry
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: .LBB0_4: # %entry
+; RV32-NEXT: li a3, -1
+; RV32-NEXT: beq a1, a3, .LBB0_6
+; RV32-NEXT: # %bb.5: # %entry
+; RV32-NEXT: slt a1, a3, a1
+; RV32-NEXT: beqz a1, .LBB0_7
+; RV32-NEXT: j .LBB0_8
+; RV32-NEXT: .LBB0_6:
+; RV32-NEXT: sltu a1, a2, a0
+; RV32-NEXT: bnez a1, .LBB0_8
+; RV32-NEXT: .LBB0_7: # %entry
+; RV32-NEXT: lui a0, 524288
+; RV32-NEXT: .LBB0_8: # %entry
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64IF-LABEL: stest_f64i32:
+; RV64IF: # %bb.0: # %entry
+; RV64IF-NEXT: addi sp, sp, -16
+; RV64IF-NEXT: .cfi_def_cfa_offset 16
+; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IF-NEXT: .cfi_offset ra, -8
+; RV64IF-NEXT: call __fixdfdi at plt
+; RV64IF-NEXT: lui a1, 524288
+; RV64IF-NEXT: addiw a2, a1, -1
+; RV64IF-NEXT: blt a0, a2, .LBB0_2
+; RV64IF-NEXT: # %bb.1: # %entry
+; RV64IF-NEXT: mv a0, a2
+; RV64IF-NEXT: .LBB0_2: # %entry
+; RV64IF-NEXT: blt a1, a0, .LBB0_4
+; RV64IF-NEXT: # %bb.3: # %entry
+; RV64IF-NEXT: lui a0, 524288
+; RV64IF-NEXT: .LBB0_4: # %entry
+; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IF-NEXT: addi sp, sp, 16
+; RV64IF-NEXT: ret
+;
+; RV64IFD-LABEL: stest_f64i32:
+; RV64IFD: # %bb.0: # %entry
+; RV64IFD-NEXT: fmv.d.x ft0, a0
+; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz
+; RV64IFD-NEXT: lui a1, 524288
+; RV64IFD-NEXT: addiw a2, a1, -1
+; RV64IFD-NEXT: bge a0, a2, .LBB0_3
+; RV64IFD-NEXT: # %bb.1: # %entry
+; RV64IFD-NEXT: bge a1, a0, .LBB0_4
+; RV64IFD-NEXT: .LBB0_2: # %entry
+; RV64IFD-NEXT: ret
+; RV64IFD-NEXT: .LBB0_3: # %entry
+; RV64IFD-NEXT: mv a0, a2
+; RV64IFD-NEXT: blt a1, a0, .LBB0_2
+; RV64IFD-NEXT: .LBB0_4: # %entry
+; RV64IFD-NEXT: lui a0, 524288
+; RV64IFD-NEXT: ret
+entry:
+ %conv = fptosi double %x to i64
+ %0 = icmp slt i64 %conv, 2147483647
+ %spec.store.select = select i1 %0, i64 %conv, i64 2147483647
+ %1 = icmp sgt i64 %spec.store.select, -2147483648
+ %spec.store.select7 = select i1 %1, i64 %spec.store.select, i64 -2147483648
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @utest_f64i32(double %x) {
+; RV32-LABEL: utest_f64i32:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __fixunsdfdi at plt
+; RV32-NEXT: beqz a1, .LBB1_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: beqz a1, .LBB1_3
+; RV32-NEXT: j .LBB1_4
+; RV32-NEXT: .LBB1_2:
+; RV32-NEXT: addi a1, a0, 1
+; RV32-NEXT: snez a1, a1
+; RV32-NEXT: bnez a1, .LBB1_4
+; RV32-NEXT: .LBB1_3: # %entry
+; RV32-NEXT: li a0, -1
+; RV32-NEXT: .LBB1_4: # %entry
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64IF-LABEL: utest_f64i32:
+; RV64IF: # %bb.0: # %entry
+; RV64IF-NEXT: addi sp, sp, -16
+; RV64IF-NEXT: .cfi_def_cfa_offset 16
+; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IF-NEXT: .cfi_offset ra, -8
+; RV64IF-NEXT: call __fixunsdfdi at plt
+; RV64IF-NEXT: li a1, -1
+; RV64IF-NEXT: srli a1, a1, 32
+; RV64IF-NEXT: bltu a0, a1, .LBB1_2
+; RV64IF-NEXT: # %bb.1: # %entry
+; RV64IF-NEXT: mv a0, a1
+; RV64IF-NEXT: .LBB1_2: # %entry
+; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IF-NEXT: addi sp, sp, 16
+; RV64IF-NEXT: ret
+;
+; RV64IFD-LABEL: utest_f64i32:
+; RV64IFD: # %bb.0: # %entry
+; RV64IFD-NEXT: fmv.d.x ft0, a0
+; RV64IFD-NEXT: fcvt.lu.d a0, ft0, rtz
+; RV64IFD-NEXT: li a1, -1
+; RV64IFD-NEXT: srli a1, a1, 32
+; RV64IFD-NEXT: bltu a0, a1, .LBB1_2
+; RV64IFD-NEXT: # %bb.1: # %entry
+; RV64IFD-NEXT: mv a0, a1
+; RV64IFD-NEXT: .LBB1_2: # %entry
+; RV64IFD-NEXT: ret
+entry:
+ %conv = fptoui double %x to i64
+ %0 = icmp ult i64 %conv, 4294967295
+ %spec.store.select = select i1 %0, i64 %conv, i64 4294967295
+ %conv6 = trunc i64 %spec.store.select to i32
+ ret i32 %conv6
+}
+
+define i32 @ustest_f64i32(double %x) {
+; RV32-LABEL: ustest_f64i32:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __fixdfdi at plt
+; RV32-NEXT: beqz a1, .LBB2_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: slti a2, a1, 0
+; RV32-NEXT: beqz a2, .LBB2_3
+; RV32-NEXT: j .LBB2_4
+; RV32-NEXT: .LBB2_2:
+; RV32-NEXT: addi a2, a0, 1
+; RV32-NEXT: snez a2, a2
+; RV32-NEXT: bnez a2, .LBB2_4
+; RV32-NEXT: .LBB2_3: # %entry
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: li a0, -1
+; RV32-NEXT: .LBB2_4: # %entry
+; RV32-NEXT: beqz a1, .LBB2_6
+; RV32-NEXT: # %bb.5: # %entry
+; RV32-NEXT: sgtz a1, a1
+; RV32-NEXT: beqz a1, .LBB2_7
+; RV32-NEXT: j .LBB2_8
+; RV32-NEXT: .LBB2_6:
+; RV32-NEXT: snez a1, a0
+; RV32-NEXT: bnez a1, .LBB2_8
+; RV32-NEXT: .LBB2_7: # %entry
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: .LBB2_8: # %entry
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64IF-LABEL: ustest_f64i32:
+; RV64IF: # %bb.0: # %entry
+; RV64IF-NEXT: addi sp, sp, -16
+; RV64IF-NEXT: .cfi_def_cfa_offset 16
+; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IF-NEXT: .cfi_offset ra, -8
+; RV64IF-NEXT: call __fixdfdi at plt
+; RV64IF-NEXT: li a1, -1
+; RV64IF-NEXT: srli a1, a1, 32
+; RV64IF-NEXT: blt a0, a1, .LBB2_2
+; RV64IF-NEXT: # %bb.1: # %entry
+; RV64IF-NEXT: mv a0, a1
+; RV64IF-NEXT: .LBB2_2: # %entry
+; RV64IF-NEXT: bgtz a0, .LBB2_4
+; RV64IF-NEXT: # %bb.3: # %entry
+; RV64IF-NEXT: li a0, 0
+; RV64IF-NEXT: .LBB2_4: # %entry
+; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IF-NEXT: addi sp, sp, 16
+; RV64IF-NEXT: ret
+;
+; RV64IFD-LABEL: ustest_f64i32:
+; RV64IFD: # %bb.0: # %entry
+; RV64IFD-NEXT: fmv.d.x ft0, a0
+; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz
+; RV64IFD-NEXT: li a1, -1
+; RV64IFD-NEXT: srli a1, a1, 32
+; RV64IFD-NEXT: bge a0, a1, .LBB2_3
+; RV64IFD-NEXT: # %bb.1: # %entry
+; RV64IFD-NEXT: blez a0, .LBB2_4
+; RV64IFD-NEXT: .LBB2_2: # %entry
+; RV64IFD-NEXT: ret
+; RV64IFD-NEXT: .LBB2_3: # %entry
+; RV64IFD-NEXT: mv a0, a1
+; RV64IFD-NEXT: bgtz a0, .LBB2_2
+; RV64IFD-NEXT: .LBB2_4: # %entry
+; RV64IFD-NEXT: li a0, 0
+; RV64IFD-NEXT: ret
+entry:
+ %conv = fptosi double %x to i64
+ %0 = icmp slt i64 %conv, 4294967295
+ %spec.store.select = select i1 %0, i64 %conv, i64 4294967295
+ %1 = icmp sgt i64 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i64 %spec.store.select, i64 0
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @stest_f32i32(float %x) {
+; RV32-LABEL: stest_f32i32:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __fixsfdi at plt
+; RV32-NEXT: lui a2, 524288
+; RV32-NEXT: addi a3, a2, -1
+; RV32-NEXT: beqz a1, .LBB3_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: slti a4, a1, 0
+; RV32-NEXT: beqz a4, .LBB3_3
+; RV32-NEXT: j .LBB3_4
+; RV32-NEXT: .LBB3_2:
+; RV32-NEXT: sltu a4, a0, a3
+; RV32-NEXT: bnez a4, .LBB3_4
+; RV32-NEXT: .LBB3_3: # %entry
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: .LBB3_4: # %entry
+; RV32-NEXT: li a3, -1
+; RV32-NEXT: beq a1, a3, .LBB3_6
+; RV32-NEXT: # %bb.5: # %entry
+; RV32-NEXT: slt a1, a3, a1
+; RV32-NEXT: beqz a1, .LBB3_7
+; RV32-NEXT: j .LBB3_8
+; RV32-NEXT: .LBB3_6:
+; RV32-NEXT: sltu a1, a2, a0
+; RV32-NEXT: bnez a1, .LBB3_8
+; RV32-NEXT: .LBB3_7: # %entry
+; RV32-NEXT: lui a0, 524288
+; RV32-NEXT: .LBB3_8: # %entry
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: stest_f32i32:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: fmv.w.x ft0, a0
+; RV64-NEXT: fcvt.l.s a0, ft0, rtz
+; RV64-NEXT: lui a1, 524288
+; RV64-NEXT: addiw a2, a1, -1
+; RV64-NEXT: bge a0, a2, .LBB3_3
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: bge a1, a0, .LBB3_4
+; RV64-NEXT: .LBB3_2: # %entry
+; RV64-NEXT: ret
+; RV64-NEXT: .LBB3_3: # %entry
+; RV64-NEXT: mv a0, a2
+; RV64-NEXT: blt a1, a0, .LBB3_2
+; RV64-NEXT: .LBB3_4: # %entry
+; RV64-NEXT: lui a0, 524288
+; RV64-NEXT: ret
+entry:
+ %conv = fptosi float %x to i64
+ %0 = icmp slt i64 %conv, 2147483647
+ %spec.store.select = select i1 %0, i64 %conv, i64 2147483647
+ %1 = icmp sgt i64 %spec.store.select, -2147483648
+ %spec.store.select7 = select i1 %1, i64 %spec.store.select, i64 -2147483648
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @utest_f32i32(float %x) {
+; RV32-LABEL: utest_f32i32:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __fixunssfdi at plt
+; RV32-NEXT: beqz a1, .LBB4_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: beqz a1, .LBB4_3
+; RV32-NEXT: j .LBB4_4
+; RV32-NEXT: .LBB4_2:
+; RV32-NEXT: addi a1, a0, 1
+; RV32-NEXT: snez a1, a1
+; RV32-NEXT: bnez a1, .LBB4_4
+; RV32-NEXT: .LBB4_3: # %entry
+; RV32-NEXT: li a0, -1
+; RV32-NEXT: .LBB4_4: # %entry
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: utest_f32i32:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: fmv.w.x ft0, a0
+; RV64-NEXT: fcvt.lu.s a0, ft0, rtz
+; RV64-NEXT: li a1, -1
+; RV64-NEXT: srli a1, a1, 32
+; RV64-NEXT: bltu a0, a1, .LBB4_2
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: .LBB4_2: # %entry
+; RV64-NEXT: ret
+entry:
+ %conv = fptoui float %x to i64
+ %0 = icmp ult i64 %conv, 4294967295
+ %spec.store.select = select i1 %0, i64 %conv, i64 4294967295
+ %conv6 = trunc i64 %spec.store.select to i32
+ ret i32 %conv6
+}
+
+define i32 @ustest_f32i32(float %x) {
+; RV32-LABEL: ustest_f32i32:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __fixsfdi at plt
+; RV32-NEXT: beqz a1, .LBB5_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: slti a2, a1, 0
+; RV32-NEXT: beqz a2, .LBB5_3
+; RV32-NEXT: j .LBB5_4
+; RV32-NEXT: .LBB5_2:
+; RV32-NEXT: addi a2, a0, 1
+; RV32-NEXT: snez a2, a2
+; RV32-NEXT: bnez a2, .LBB5_4
+; RV32-NEXT: .LBB5_3: # %entry
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: li a0, -1
+; RV32-NEXT: .LBB5_4: # %entry
+; RV32-NEXT: beqz a1, .LBB5_6
+; RV32-NEXT: # %bb.5: # %entry
+; RV32-NEXT: sgtz a1, a1
+; RV32-NEXT: beqz a1, .LBB5_7
+; RV32-NEXT: j .LBB5_8
+; RV32-NEXT: .LBB5_6:
+; RV32-NEXT: snez a1, a0
+; RV32-NEXT: bnez a1, .LBB5_8
+; RV32-NEXT: .LBB5_7: # %entry
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: .LBB5_8: # %entry
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: ustest_f32i32:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: fmv.w.x ft0, a0
+; RV64-NEXT: fcvt.l.s a0, ft0, rtz
+; RV64-NEXT: li a1, -1
+; RV64-NEXT: srli a1, a1, 32
+; RV64-NEXT: bge a0, a1, .LBB5_3
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: blez a0, .LBB5_4
+; RV64-NEXT: .LBB5_2: # %entry
+; RV64-NEXT: ret
+; RV64-NEXT: .LBB5_3: # %entry
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: bgtz a0, .LBB5_2
+; RV64-NEXT: .LBB5_4: # %entry
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: ret
+entry:
+ %conv = fptosi float %x to i64
+ %0 = icmp slt i64 %conv, 4294967295
+ %spec.store.select = select i1 %0, i64 %conv, i64 4294967295
+ %1 = icmp sgt i64 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i64 %spec.store.select, i64 0
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @stest_f16i32(half %x) {
+; RV32-LABEL: stest_f16i32:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __gnu_h2f_ieee at plt
+; RV32-NEXT: call __fixsfdi at plt
+; RV32-NEXT: lui a2, 524288
+; RV32-NEXT: addi a3, a2, -1
+; RV32-NEXT: beqz a1, .LBB6_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: slti a4, a1, 0
+; RV32-NEXT: beqz a4, .LBB6_3
+; RV32-NEXT: j .LBB6_4
+; RV32-NEXT: .LBB6_2:
+; RV32-NEXT: sltu a4, a0, a3
+; RV32-NEXT: bnez a4, .LBB6_4
+; RV32-NEXT: .LBB6_3: # %entry
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: .LBB6_4: # %entry
+; RV32-NEXT: li a3, -1
+; RV32-NEXT: beq a1, a3, .LBB6_6
+; RV32-NEXT: # %bb.5: # %entry
+; RV32-NEXT: slt a1, a3, a1
+; RV32-NEXT: beqz a1, .LBB6_7
+; RV32-NEXT: j .LBB6_8
+; RV32-NEXT: .LBB6_6:
+; RV32-NEXT: sltu a1, a2, a0
+; RV32-NEXT: bnez a1, .LBB6_8
+; RV32-NEXT: .LBB6_7: # %entry
+; RV32-NEXT: lui a0, 524288
+; RV32-NEXT: .LBB6_8: # %entry
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: stest_f16i32:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __gnu_h2f_ieee at plt
+; RV64-NEXT: fmv.w.x ft0, a0
+; RV64-NEXT: fcvt.l.s a0, ft0, rtz
+; RV64-NEXT: lui a1, 524288
+; RV64-NEXT: addiw a2, a1, -1
+; RV64-NEXT: blt a0, a2, .LBB6_2
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: mv a0, a2
+; RV64-NEXT: .LBB6_2: # %entry
+; RV64-NEXT: blt a1, a0, .LBB6_4
+; RV64-NEXT: # %bb.3: # %entry
+; RV64-NEXT: lui a0, 524288
+; RV64-NEXT: .LBB6_4: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+entry:
+ %conv = fptosi half %x to i64
+ %0 = icmp slt i64 %conv, 2147483647
+ %spec.store.select = select i1 %0, i64 %conv, i64 2147483647
+ %1 = icmp sgt i64 %spec.store.select, -2147483648
+ %spec.store.select7 = select i1 %1, i64 %spec.store.select, i64 -2147483648
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @utesth_f16i32(half %x) {
+; RV32-LABEL: utesth_f16i32:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __gnu_h2f_ieee at plt
+; RV32-NEXT: call __fixunssfdi at plt
+; RV32-NEXT: beqz a1, .LBB7_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: beqz a1, .LBB7_3
+; RV32-NEXT: j .LBB7_4
+; RV32-NEXT: .LBB7_2:
+; RV32-NEXT: addi a1, a0, 1
+; RV32-NEXT: snez a1, a1
+; RV32-NEXT: bnez a1, .LBB7_4
+; RV32-NEXT: .LBB7_3: # %entry
+; RV32-NEXT: li a0, -1
+; RV32-NEXT: .LBB7_4: # %entry
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: utesth_f16i32:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __gnu_h2f_ieee at plt
+; RV64-NEXT: fmv.w.x ft0, a0
+; RV64-NEXT: fcvt.lu.s a0, ft0, rtz
+; RV64-NEXT: li a1, -1
+; RV64-NEXT: srli a1, a1, 32
+; RV64-NEXT: bltu a0, a1, .LBB7_2
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: .LBB7_2: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+entry:
+ %conv = fptoui half %x to i64
+ %0 = icmp ult i64 %conv, 4294967295
+ %spec.store.select = select i1 %0, i64 %conv, i64 4294967295
+ %conv6 = trunc i64 %spec.store.select to i32
+ ret i32 %conv6
+}
+
+define i32 @ustest_f16i32(half %x) {
+; RV32-LABEL: ustest_f16i32:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __gnu_h2f_ieee at plt
+; RV32-NEXT: call __fixsfdi at plt
+; RV32-NEXT: beqz a1, .LBB8_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: slti a2, a1, 0
+; RV32-NEXT: beqz a2, .LBB8_3
+; RV32-NEXT: j .LBB8_4
+; RV32-NEXT: .LBB8_2:
+; RV32-NEXT: addi a2, a0, 1
+; RV32-NEXT: snez a2, a2
+; RV32-NEXT: bnez a2, .LBB8_4
+; RV32-NEXT: .LBB8_3: # %entry
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: li a0, -1
+; RV32-NEXT: .LBB8_4: # %entry
+; RV32-NEXT: beqz a1, .LBB8_6
+; RV32-NEXT: # %bb.5: # %entry
+; RV32-NEXT: sgtz a1, a1
+; RV32-NEXT: beqz a1, .LBB8_7
+; RV32-NEXT: j .LBB8_8
+; RV32-NEXT: .LBB8_6:
+; RV32-NEXT: snez a1, a0
+; RV32-NEXT: bnez a1, .LBB8_8
+; RV32-NEXT: .LBB8_7: # %entry
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: .LBB8_8: # %entry
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: ustest_f16i32:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __gnu_h2f_ieee at plt
+; RV64-NEXT: fmv.w.x ft0, a0
+; RV64-NEXT: fcvt.l.s a0, ft0, rtz
+; RV64-NEXT: li a1, -1
+; RV64-NEXT: srli a1, a1, 32
+; RV64-NEXT: blt a0, a1, .LBB8_2
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: .LBB8_2: # %entry
+; RV64-NEXT: bgtz a0, .LBB8_4
+; RV64-NEXT: # %bb.3: # %entry
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: .LBB8_4: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+entry:
+ %conv = fptosi half %x to i64
+ %0 = icmp slt i64 %conv, 4294967295
+ %spec.store.select = select i1 %0, i64 %conv, i64 4294967295
+ %1 = icmp sgt i64 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i64 %spec.store.select, i64 0
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+; i16 saturate
+
+define i16 @stest_f64i16(double %x) {
+; RV32IF-LABEL: stest_f64i16:
+; RV32IF: # %bb.0: # %entry
+; RV32IF-NEXT: addi sp, sp, -16
+; RV32IF-NEXT: .cfi_def_cfa_offset 16
+; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: .cfi_offset ra, -4
+; RV32IF-NEXT: call __fixdfsi at plt
+; RV32IF-NEXT: lui a1, 8
+; RV32IF-NEXT: addi a1, a1, -1
+; RV32IF-NEXT: blt a0, a1, .LBB9_2
+; RV32IF-NEXT: # %bb.1: # %entry
+; RV32IF-NEXT: mv a0, a1
+; RV32IF-NEXT: .LBB9_2: # %entry
+; RV32IF-NEXT: lui a1, 1048568
+; RV32IF-NEXT: blt a1, a0, .LBB9_4
+; RV32IF-NEXT: # %bb.3: # %entry
+; RV32IF-NEXT: lui a0, 1048568
+; RV32IF-NEXT: .LBB9_4: # %entry
+; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: addi sp, sp, 16
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: stest_f64i16:
+; RV64IF: # %bb.0: # %entry
+; RV64IF-NEXT: addi sp, sp, -16
+; RV64IF-NEXT: .cfi_def_cfa_offset 16
+; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IF-NEXT: .cfi_offset ra, -8
+; RV64IF-NEXT: call __fixdfsi at plt
+; RV64IF-NEXT: lui a1, 8
+; RV64IF-NEXT: addiw a1, a1, -1
+; RV64IF-NEXT: blt a0, a1, .LBB9_2
+; RV64IF-NEXT: # %bb.1: # %entry
+; RV64IF-NEXT: mv a0, a1
+; RV64IF-NEXT: .LBB9_2: # %entry
+; RV64IF-NEXT: lui a1, 1048568
+; RV64IF-NEXT: blt a1, a0, .LBB9_4
+; RV64IF-NEXT: # %bb.3: # %entry
+; RV64IF-NEXT: lui a0, 1048568
+; RV64IF-NEXT: .LBB9_4: # %entry
+; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IF-NEXT: addi sp, sp, 16
+; RV64IF-NEXT: ret
+;
+; RV32IFD-LABEL: stest_f64i16:
+; RV32IFD: # %bb.0: # %entry
+; RV32IFD-NEXT: addi sp, sp, -16
+; RV32IFD-NEXT: .cfi_def_cfa_offset 16
+; RV32IFD-NEXT: sw a0, 8(sp)
+; RV32IFD-NEXT: sw a1, 12(sp)
+; RV32IFD-NEXT: fld ft0, 8(sp)
+; RV32IFD-NEXT: fcvt.w.d a0, ft0, rtz
+; RV32IFD-NEXT: lui a1, 8
+; RV32IFD-NEXT: addi a1, a1, -1
+; RV32IFD-NEXT: bge a0, a1, .LBB9_3
+; RV32IFD-NEXT: # %bb.1: # %entry
+; RV32IFD-NEXT: lui a1, 1048568
+; RV32IFD-NEXT: bge a1, a0, .LBB9_4
+; RV32IFD-NEXT: .LBB9_2: # %entry
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+; RV32IFD-NEXT: .LBB9_3: # %entry
+; RV32IFD-NEXT: mv a0, a1
+; RV32IFD-NEXT: lui a1, 1048568
+; RV32IFD-NEXT: blt a1, a0, .LBB9_2
+; RV32IFD-NEXT: .LBB9_4: # %entry
+; RV32IFD-NEXT: lui a0, 1048568
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+;
+; RV64IFD-LABEL: stest_f64i16:
+; RV64IFD: # %bb.0: # %entry
+; RV64IFD-NEXT: fmv.d.x ft0, a0
+; RV64IFD-NEXT: fcvt.w.d a0, ft0, rtz
+; RV64IFD-NEXT: lui a1, 8
+; RV64IFD-NEXT: addiw a1, a1, -1
+; RV64IFD-NEXT: bge a0, a1, .LBB9_3
+; RV64IFD-NEXT: # %bb.1: # %entry
+; RV64IFD-NEXT: lui a1, 1048568
+; RV64IFD-NEXT: bge a1, a0, .LBB9_4
+; RV64IFD-NEXT: .LBB9_2: # %entry
+; RV64IFD-NEXT: ret
+; RV64IFD-NEXT: .LBB9_3: # %entry
+; RV64IFD-NEXT: mv a0, a1
+; RV64IFD-NEXT: lui a1, 1048568
+; RV64IFD-NEXT: blt a1, a0, .LBB9_2
+; RV64IFD-NEXT: .LBB9_4: # %entry
+; RV64IFD-NEXT: lui a0, 1048568
+; RV64IFD-NEXT: ret
+entry:
+ %conv = fptosi double %x to i32
+ %0 = icmp slt i32 %conv, 32767
+ %spec.store.select = select i1 %0, i32 %conv, i32 32767
+ %1 = icmp sgt i32 %spec.store.select, -32768
+ %spec.store.select7 = select i1 %1, i32 %spec.store.select, i32 -32768
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @utest_f64i16(double %x) {
+; RV32IF-LABEL: utest_f64i16:
+; RV32IF: # %bb.0: # %entry
+; RV32IF-NEXT: addi sp, sp, -16
+; RV32IF-NEXT: .cfi_def_cfa_offset 16
+; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: .cfi_offset ra, -4
+; RV32IF-NEXT: call __fixunsdfsi at plt
+; RV32IF-NEXT: lui a1, 16
+; RV32IF-NEXT: addi a1, a1, -1
+; RV32IF-NEXT: bltu a0, a1, .LBB10_2
+; RV32IF-NEXT: # %bb.1: # %entry
+; RV32IF-NEXT: mv a0, a1
+; RV32IF-NEXT: .LBB10_2: # %entry
+; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: addi sp, sp, 16
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: utest_f64i16:
+; RV64IF: # %bb.0: # %entry
+; RV64IF-NEXT: addi sp, sp, -16
+; RV64IF-NEXT: .cfi_def_cfa_offset 16
+; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IF-NEXT: .cfi_offset ra, -8
+; RV64IF-NEXT: call __fixunsdfsi at plt
+; RV64IF-NEXT: lui a1, 16
+; RV64IF-NEXT: addiw a1, a1, -1
+; RV64IF-NEXT: bltu a0, a1, .LBB10_2
+; RV64IF-NEXT: # %bb.1: # %entry
+; RV64IF-NEXT: mv a0, a1
+; RV64IF-NEXT: .LBB10_2: # %entry
+; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IF-NEXT: addi sp, sp, 16
+; RV64IF-NEXT: ret
+;
+; RV32IFD-LABEL: utest_f64i16:
+; RV32IFD: # %bb.0: # %entry
+; RV32IFD-NEXT: addi sp, sp, -16
+; RV32IFD-NEXT: .cfi_def_cfa_offset 16
+; RV32IFD-NEXT: sw a0, 8(sp)
+; RV32IFD-NEXT: sw a1, 12(sp)
+; RV32IFD-NEXT: fld ft0, 8(sp)
+; RV32IFD-NEXT: fcvt.wu.d a0, ft0, rtz
+; RV32IFD-NEXT: lui a1, 16
+; RV32IFD-NEXT: addi a1, a1, -1
+; RV32IFD-NEXT: bltu a0, a1, .LBB10_2
+; RV32IFD-NEXT: # %bb.1: # %entry
+; RV32IFD-NEXT: mv a0, a1
+; RV32IFD-NEXT: .LBB10_2: # %entry
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+;
+; RV64IFD-LABEL: utest_f64i16:
+; RV64IFD: # %bb.0: # %entry
+; RV64IFD-NEXT: fmv.d.x ft0, a0
+; RV64IFD-NEXT: fcvt.wu.d a0, ft0, rtz
+; RV64IFD-NEXT: lui a1, 16
+; RV64IFD-NEXT: addiw a1, a1, -1
+; RV64IFD-NEXT: bltu a0, a1, .LBB10_2
+; RV64IFD-NEXT: # %bb.1: # %entry
+; RV64IFD-NEXT: mv a0, a1
+; RV64IFD-NEXT: .LBB10_2: # %entry
+; RV64IFD-NEXT: ret
+entry:
+ %conv = fptoui double %x to i32
+ %0 = icmp ult i32 %conv, 65535
+ %spec.store.select = select i1 %0, i32 %conv, i32 65535
+ %conv6 = trunc i32 %spec.store.select to i16
+ ret i16 %conv6
+}
+
+define i16 @ustest_f64i16(double %x) {
+; RV32IF-LABEL: ustest_f64i16:
+; RV32IF: # %bb.0: # %entry
+; RV32IF-NEXT: addi sp, sp, -16
+; RV32IF-NEXT: .cfi_def_cfa_offset 16
+; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: .cfi_offset ra, -4
+; RV32IF-NEXT: call __fixdfsi at plt
+; RV32IF-NEXT: lui a1, 16
+; RV32IF-NEXT: addi a1, a1, -1
+; RV32IF-NEXT: blt a0, a1, .LBB11_2
+; RV32IF-NEXT: # %bb.1: # %entry
+; RV32IF-NEXT: mv a0, a1
+; RV32IF-NEXT: .LBB11_2: # %entry
+; RV32IF-NEXT: bgtz a0, .LBB11_4
+; RV32IF-NEXT: # %bb.3: # %entry
+; RV32IF-NEXT: li a0, 0
+; RV32IF-NEXT: .LBB11_4: # %entry
+; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: addi sp, sp, 16
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: ustest_f64i16:
+; RV64IF: # %bb.0: # %entry
+; RV64IF-NEXT: addi sp, sp, -16
+; RV64IF-NEXT: .cfi_def_cfa_offset 16
+; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IF-NEXT: .cfi_offset ra, -8
+; RV64IF-NEXT: call __fixdfsi at plt
+; RV64IF-NEXT: lui a1, 16
+; RV64IF-NEXT: addiw a1, a1, -1
+; RV64IF-NEXT: blt a0, a1, .LBB11_2
+; RV64IF-NEXT: # %bb.1: # %entry
+; RV64IF-NEXT: mv a0, a1
+; RV64IF-NEXT: .LBB11_2: # %entry
+; RV64IF-NEXT: bgtz a0, .LBB11_4
+; RV64IF-NEXT: # %bb.3: # %entry
+; RV64IF-NEXT: li a0, 0
+; RV64IF-NEXT: .LBB11_4: # %entry
+; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IF-NEXT: addi sp, sp, 16
+; RV64IF-NEXT: ret
+;
+; RV32IFD-LABEL: ustest_f64i16:
+; RV32IFD: # %bb.0: # %entry
+; RV32IFD-NEXT: addi sp, sp, -16
+; RV32IFD-NEXT: .cfi_def_cfa_offset 16
+; RV32IFD-NEXT: sw a0, 8(sp)
+; RV32IFD-NEXT: sw a1, 12(sp)
+; RV32IFD-NEXT: fld ft0, 8(sp)
+; RV32IFD-NEXT: fcvt.w.d a0, ft0, rtz
+; RV32IFD-NEXT: lui a1, 16
+; RV32IFD-NEXT: addi a1, a1, -1
+; RV32IFD-NEXT: bge a0, a1, .LBB11_3
+; RV32IFD-NEXT: # %bb.1: # %entry
+; RV32IFD-NEXT: blez a0, .LBB11_4
+; RV32IFD-NEXT: .LBB11_2: # %entry
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+; RV32IFD-NEXT: .LBB11_3: # %entry
+; RV32IFD-NEXT: mv a0, a1
+; RV32IFD-NEXT: bgtz a0, .LBB11_2
+; RV32IFD-NEXT: .LBB11_4: # %entry
+; RV32IFD-NEXT: li a0, 0
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+;
+; RV64IFD-LABEL: ustest_f64i16:
+; RV64IFD: # %bb.0: # %entry
+; RV64IFD-NEXT: fmv.d.x ft0, a0
+; RV64IFD-NEXT: fcvt.w.d a0, ft0, rtz
+; RV64IFD-NEXT: lui a1, 16
+; RV64IFD-NEXT: addiw a1, a1, -1
+; RV64IFD-NEXT: bge a0, a1, .LBB11_3
+; RV64IFD-NEXT: # %bb.1: # %entry
+; RV64IFD-NEXT: blez a0, .LBB11_4
+; RV64IFD-NEXT: .LBB11_2: # %entry
+; RV64IFD-NEXT: ret
+; RV64IFD-NEXT: .LBB11_3: # %entry
+; RV64IFD-NEXT: mv a0, a1
+; RV64IFD-NEXT: bgtz a0, .LBB11_2
+; RV64IFD-NEXT: .LBB11_4: # %entry
+; RV64IFD-NEXT: li a0, 0
+; RV64IFD-NEXT: ret
+entry:
+ %conv = fptosi double %x to i32
+ %0 = icmp slt i32 %conv, 65535
+ %spec.store.select = select i1 %0, i32 %conv, i32 65535
+ %1 = icmp sgt i32 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i32 %spec.store.select, i32 0
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @stest_f32i16(float %x) {
+; RV32-LABEL: stest_f32i16:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: fmv.w.x ft0, a0
+; RV32-NEXT: fcvt.w.s a0, ft0, rtz
+; RV32-NEXT: lui a1, 8
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: bge a0, a1, .LBB12_3
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: lui a1, 1048568
+; RV32-NEXT: bge a1, a0, .LBB12_4
+; RV32-NEXT: .LBB12_2: # %entry
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB12_3: # %entry
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: lui a1, 1048568
+; RV32-NEXT: blt a1, a0, .LBB12_2
+; RV32-NEXT: .LBB12_4: # %entry
+; RV32-NEXT: lui a0, 1048568
+; RV32-NEXT: ret
+;
+; RV64-LABEL: stest_f32i16:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: fmv.w.x ft0, a0
+; RV64-NEXT: fcvt.w.s a0, ft0, rtz
+; RV64-NEXT: lui a1, 8
+; RV64-NEXT: addiw a1, a1, -1
+; RV64-NEXT: bge a0, a1, .LBB12_3
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: lui a1, 1048568
+; RV64-NEXT: bge a1, a0, .LBB12_4
+; RV64-NEXT: .LBB12_2: # %entry
+; RV64-NEXT: ret
+; RV64-NEXT: .LBB12_3: # %entry
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: lui a1, 1048568
+; RV64-NEXT: blt a1, a0, .LBB12_2
+; RV64-NEXT: .LBB12_4: # %entry
+; RV64-NEXT: lui a0, 1048568
+; RV64-NEXT: ret
+entry:
+ %conv = fptosi float %x to i32
+ %0 = icmp slt i32 %conv, 32767
+ %spec.store.select = select i1 %0, i32 %conv, i32 32767
+ %1 = icmp sgt i32 %spec.store.select, -32768
+ %spec.store.select7 = select i1 %1, i32 %spec.store.select, i32 -32768
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @utest_f32i16(float %x) {
+; RV32-LABEL: utest_f32i16:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: fmv.w.x ft0, a0
+; RV32-NEXT: fcvt.wu.s a0, ft0, rtz
+; RV32-NEXT: lui a1, 16
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: bltu a0, a1, .LBB13_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: .LBB13_2: # %entry
+; RV32-NEXT: ret
+;
+; RV64-LABEL: utest_f32i16:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: fmv.w.x ft0, a0
+; RV64-NEXT: fcvt.wu.s a0, ft0, rtz
+; RV64-NEXT: lui a1, 16
+; RV64-NEXT: addiw a1, a1, -1
+; RV64-NEXT: bltu a0, a1, .LBB13_2
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: .LBB13_2: # %entry
+; RV64-NEXT: ret
+entry:
+ %conv = fptoui float %x to i32
+ %0 = icmp ult i32 %conv, 65535
+ %spec.store.select = select i1 %0, i32 %conv, i32 65535
+ %conv6 = trunc i32 %spec.store.select to i16
+ ret i16 %conv6
+}
+
+define i16 @ustest_f32i16(float %x) {
+; RV32-LABEL: ustest_f32i16:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: fmv.w.x ft0, a0
+; RV32-NEXT: fcvt.w.s a0, ft0, rtz
+; RV32-NEXT: lui a1, 16
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: bge a0, a1, .LBB14_3
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: blez a0, .LBB14_4
+; RV32-NEXT: .LBB14_2: # %entry
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB14_3: # %entry
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: bgtz a0, .LBB14_2
+; RV32-NEXT: .LBB14_4: # %entry
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: ustest_f32i16:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: fmv.w.x ft0, a0
+; RV64-NEXT: fcvt.w.s a0, ft0, rtz
+; RV64-NEXT: lui a1, 16
+; RV64-NEXT: addiw a1, a1, -1
+; RV64-NEXT: bge a0, a1, .LBB14_3
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: blez a0, .LBB14_4
+; RV64-NEXT: .LBB14_2: # %entry
+; RV64-NEXT: ret
+; RV64-NEXT: .LBB14_3: # %entry
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: bgtz a0, .LBB14_2
+; RV64-NEXT: .LBB14_4: # %entry
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: ret
+entry:
+ %conv = fptosi float %x to i32
+ %0 = icmp slt i32 %conv, 65535
+ %spec.store.select = select i1 %0, i32 %conv, i32 65535
+ %1 = icmp sgt i32 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i32 %spec.store.select, i32 0
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @stest_f16i16(half %x) {
+; RV32-LABEL: stest_f16i16:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __gnu_h2f_ieee at plt
+; RV32-NEXT: fmv.w.x ft0, a0
+; RV32-NEXT: fcvt.w.s a0, ft0, rtz
+; RV32-NEXT: lui a1, 8
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: blt a0, a1, .LBB15_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: .LBB15_2: # %entry
+; RV32-NEXT: lui a1, 1048568
+; RV32-NEXT: blt a1, a0, .LBB15_4
+; RV32-NEXT: # %bb.3: # %entry
+; RV32-NEXT: lui a0, 1048568
+; RV32-NEXT: .LBB15_4: # %entry
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: stest_f16i16:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __gnu_h2f_ieee at plt
+; RV64-NEXT: fmv.w.x ft0, a0
+; RV64-NEXT: fcvt.l.s a0, ft0, rtz
+; RV64-NEXT: lui a1, 8
+; RV64-NEXT: addiw a1, a1, -1
+; RV64-NEXT: blt a0, a1, .LBB15_2
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: .LBB15_2: # %entry
+; RV64-NEXT: lui a1, 1048568
+; RV64-NEXT: blt a1, a0, .LBB15_4
+; RV64-NEXT: # %bb.3: # %entry
+; RV64-NEXT: lui a0, 1048568
+; RV64-NEXT: .LBB15_4: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+entry:
+ %conv = fptosi half %x to i32
+ %0 = icmp slt i32 %conv, 32767
+ %spec.store.select = select i1 %0, i32 %conv, i32 32767
+ %1 = icmp sgt i32 %spec.store.select, -32768
+ %spec.store.select7 = select i1 %1, i32 %spec.store.select, i32 -32768
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @utesth_f16i16(half %x) {
+; RV32-LABEL: utesth_f16i16:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __gnu_h2f_ieee at plt
+; RV32-NEXT: fmv.w.x ft0, a0
+; RV32-NEXT: fcvt.wu.s a0, ft0, rtz
+; RV32-NEXT: lui a1, 16
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: bltu a0, a1, .LBB16_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: .LBB16_2: # %entry
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: utesth_f16i16:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __gnu_h2f_ieee at plt
+; RV64-NEXT: fmv.w.x ft0, a0
+; RV64-NEXT: fcvt.lu.s a0, ft0, rtz
+; RV64-NEXT: sext.w a2, a0
+; RV64-NEXT: lui a1, 16
+; RV64-NEXT: addiw a1, a1, -1
+; RV64-NEXT: bltu a2, a1, .LBB16_2
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: .LBB16_2: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+entry:
+ %conv = fptoui half %x to i32
+ %0 = icmp ult i32 %conv, 65535
+ %spec.store.select = select i1 %0, i32 %conv, i32 65535
+ %conv6 = trunc i32 %spec.store.select to i16
+ ret i16 %conv6
+}
+
+define i16 @ustest_f16i16(half %x) {
+; RV32-LABEL: ustest_f16i16:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __gnu_h2f_ieee at plt
+; RV32-NEXT: fmv.w.x ft0, a0
+; RV32-NEXT: fcvt.w.s a0, ft0, rtz
+; RV32-NEXT: lui a1, 16
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: blt a0, a1, .LBB17_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: .LBB17_2: # %entry
+; RV32-NEXT: bgtz a0, .LBB17_4
+; RV32-NEXT: # %bb.3: # %entry
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: .LBB17_4: # %entry
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: ustest_f16i16:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __gnu_h2f_ieee at plt
+; RV64-NEXT: fmv.w.x ft0, a0
+; RV64-NEXT: fcvt.l.s a0, ft0, rtz
+; RV64-NEXT: lui a1, 16
+; RV64-NEXT: addiw a1, a1, -1
+; RV64-NEXT: blt a0, a1, .LBB17_2
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: .LBB17_2: # %entry
+; RV64-NEXT: bgtz a0, .LBB17_4
+; RV64-NEXT: # %bb.3: # %entry
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: .LBB17_4: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+entry:
+ %conv = fptosi half %x to i32
+ %0 = icmp slt i32 %conv, 65535
+ %spec.store.select = select i1 %0, i32 %conv, i32 65535
+ %1 = icmp sgt i32 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i32 %spec.store.select, i32 0
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+; i64 saturate
+
+define i64 @stest_f64i64(double %x) {
+; RV32-LABEL: stest_f64i64:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: mv a2, a1
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: call __fixdfti at plt
+; RV32-NEXT: lw a2, 20(sp)
+; RV32-NEXT: lw t0, 16(sp)
+; RV32-NEXT: lw a1, 12(sp)
+; RV32-NEXT: lw a0, 8(sp)
+; RV32-NEXT: lui a7, 524288
+; RV32-NEXT: addi a5, a7, -1
+; RV32-NEXT: beq a1, a5, .LBB18_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: sltu a4, a1, a5
+; RV32-NEXT: or a3, t0, a2
+; RV32-NEXT: bnez a3, .LBB18_3
+; RV32-NEXT: j .LBB18_4
+; RV32-NEXT: .LBB18_2:
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: snez a4, a4
+; RV32-NEXT: or a3, t0, a2
+; RV32-NEXT: beqz a3, .LBB18_4
+; RV32-NEXT: .LBB18_3: # %entry
+; RV32-NEXT: slti a4, a2, 0
+; RV32-NEXT: .LBB18_4: # %entry
+; RV32-NEXT: li a6, -1
+; RV32-NEXT: beqz a4, .LBB18_7
+; RV32-NEXT: # %bb.5: # %entry
+; RV32-NEXT: beq a1, a7, .LBB18_8
+; RV32-NEXT: .LBB18_6: # %entry
+; RV32-NEXT: sltu a4, a7, a1
+; RV32-NEXT: and a3, t0, a2
+; RV32-NEXT: bne a3, a6, .LBB18_9
+; RV32-NEXT: j .LBB18_10
+; RV32-NEXT: .LBB18_7: # %entry
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: li t0, 0
+; RV32-NEXT: li a0, -1
+; RV32-NEXT: mv a1, a5
+; RV32-NEXT: bne a1, a7, .LBB18_6
+; RV32-NEXT: .LBB18_8:
+; RV32-NEXT: snez a4, a0
+; RV32-NEXT: and a3, t0, a2
+; RV32-NEXT: beq a3, a6, .LBB18_10
+; RV32-NEXT: .LBB18_9: # %entry
+; RV32-NEXT: slt a4, a6, a2
+; RV32-NEXT: .LBB18_10: # %entry
+; RV32-NEXT: bnez a4, .LBB18_12
+; RV32-NEXT: # %bb.11: # %entry
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: lui a1, 524288
+; RV32-NEXT: .LBB18_12: # %entry
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: stest_f64i64:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __fixdfti at plt
+; RV64-NEXT: li a2, -1
+; RV64-NEXT: srli a3, a2, 1
+; RV64-NEXT: beqz a1, .LBB18_2
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: slti a4, a1, 0
+; RV64-NEXT: beqz a4, .LBB18_3
+; RV64-NEXT: j .LBB18_4
+; RV64-NEXT: .LBB18_2:
+; RV64-NEXT: sltu a4, a0, a3
+; RV64-NEXT: bnez a4, .LBB18_4
+; RV64-NEXT: .LBB18_3: # %entry
+; RV64-NEXT: li a1, 0
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: .LBB18_4: # %entry
+; RV64-NEXT: slli a3, a2, 63
+; RV64-NEXT: beq a1, a2, .LBB18_6
+; RV64-NEXT: # %bb.5: # %entry
+; RV64-NEXT: slt a1, a2, a1
+; RV64-NEXT: beqz a1, .LBB18_7
+; RV64-NEXT: j .LBB18_8
+; RV64-NEXT: .LBB18_6:
+; RV64-NEXT: sltu a1, a3, a0
+; RV64-NEXT: bnez a1, .LBB18_8
+; RV64-NEXT: .LBB18_7: # %entry
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: .LBB18_8: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+entry:
+ %conv = fptosi double %x to i128
+ %0 = icmp slt i128 %conv, 9223372036854775807
+ %spec.store.select = select i1 %0, i128 %conv, i128 9223372036854775807
+ %1 = icmp sgt i128 %spec.store.select, -9223372036854775808
+ %spec.store.select7 = select i1 %1, i128 %spec.store.select, i128 -9223372036854775808
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @utest_f64i64(double %x) {
+; RV32-LABEL: utest_f64i64:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: mv a2, a1
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: call __fixunsdfti at plt
+; RV32-NEXT: lw a0, 20(sp)
+; RV32-NEXT: lw a1, 16(sp)
+; RV32-NEXT: beqz a0, .LBB19_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: j .LBB19_3
+; RV32-NEXT: .LBB19_2:
+; RV32-NEXT: seqz a2, a1
+; RV32-NEXT: .LBB19_3: # %entry
+; RV32-NEXT: xori a1, a1, 1
+; RV32-NEXT: or a1, a1, a0
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: beqz a1, .LBB19_5
+; RV32-NEXT: # %bb.4: # %entry
+; RV32-NEXT: mv a0, a2
+; RV32-NEXT: .LBB19_5: # %entry
+; RV32-NEXT: bnez a0, .LBB19_7
+; RV32-NEXT: # %bb.6: # %entry
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: j .LBB19_8
+; RV32-NEXT: .LBB19_7:
+; RV32-NEXT: lw a1, 12(sp)
+; RV32-NEXT: lw a0, 8(sp)
+; RV32-NEXT: .LBB19_8: # %entry
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: utest_f64i64:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __fixunsdfti at plt
+; RV64-NEXT: beqz a1, .LBB19_2
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: .LBB19_2: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+entry:
+ %conv = fptoui double %x to i128
+ %0 = icmp ult i128 %conv, 18446744073709551616
+ %spec.store.select = select i1 %0, i128 %conv, i128 18446744073709551616
+ %conv6 = trunc i128 %spec.store.select to i64
+ ret i64 %conv6
+}
+
+define i64 @ustest_f64i64(double %x) {
+; RV32-LABEL: ustest_f64i64:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: mv a2, a1
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: call __fixdfti at plt
+; RV32-NEXT: lw a2, 20(sp)
+; RV32-NEXT: lw a3, 16(sp)
+; RV32-NEXT: beqz a2, .LBB20_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: slti a0, a2, 0
+; RV32-NEXT: j .LBB20_3
+; RV32-NEXT: .LBB20_2:
+; RV32-NEXT: seqz a0, a3
+; RV32-NEXT: .LBB20_3: # %entry
+; RV32-NEXT: xori a1, a3, 1
+; RV32-NEXT: or a4, a1, a2
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: beqz a4, .LBB20_5
+; RV32-NEXT: # %bb.4: # %entry
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: .LBB20_5: # %entry
+; RV32-NEXT: bnez a1, .LBB20_9
+; RV32-NEXT: # %bb.6: # %entry
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: li a3, 1
+; RV32-NEXT: bnez a2, .LBB20_10
+; RV32-NEXT: .LBB20_7:
+; RV32-NEXT: snez a4, a3
+; RV32-NEXT: bnez a1, .LBB20_11
+; RV32-NEXT: .LBB20_8:
+; RV32-NEXT: snez a5, a0
+; RV32-NEXT: or a2, a3, a2
+; RV32-NEXT: bnez a2, .LBB20_12
+; RV32-NEXT: j .LBB20_13
+; RV32-NEXT: .LBB20_9:
+; RV32-NEXT: lw a1, 12(sp)
+; RV32-NEXT: lw a0, 8(sp)
+; RV32-NEXT: beqz a2, .LBB20_7
+; RV32-NEXT: .LBB20_10: # %entry
+; RV32-NEXT: sgtz a4, a2
+; RV32-NEXT: beqz a1, .LBB20_8
+; RV32-NEXT: .LBB20_11: # %entry
+; RV32-NEXT: snez a5, a1
+; RV32-NEXT: or a2, a3, a2
+; RV32-NEXT: beqz a2, .LBB20_13
+; RV32-NEXT: .LBB20_12: # %entry
+; RV32-NEXT: mv a5, a4
+; RV32-NEXT: .LBB20_13: # %entry
+; RV32-NEXT: bnez a5, .LBB20_15
+; RV32-NEXT: # %bb.14: # %entry
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: .LBB20_15: # %entry
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: ustest_f64i64:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __fixdfti at plt
+; RV64-NEXT: blez a1, .LBB20_2
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: li a1, 1
+; RV64-NEXT: .LBB20_2: # %entry
+; RV64-NEXT: beqz a1, .LBB20_4
+; RV64-NEXT: # %bb.3: # %entry
+; RV64-NEXT: sgtz a1, a1
+; RV64-NEXT: beqz a1, .LBB20_5
+; RV64-NEXT: j .LBB20_6
+; RV64-NEXT: .LBB20_4:
+; RV64-NEXT: snez a1, a0
+; RV64-NEXT: bnez a1, .LBB20_6
+; RV64-NEXT: .LBB20_5: # %entry
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: .LBB20_6: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+entry:
+ %conv = fptosi double %x to i128
+ %0 = icmp slt i128 %conv, 18446744073709551616
+ %spec.store.select = select i1 %0, i128 %conv, i128 18446744073709551616
+ %1 = icmp sgt i128 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i128 %spec.store.select, i128 0
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @stest_f32i64(float %x) {
+; RV32-LABEL: stest_f32i64:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: call __fixsfti at plt
+; RV32-NEXT: lw a2, 20(sp)
+; RV32-NEXT: lw t0, 16(sp)
+; RV32-NEXT: lw a1, 12(sp)
+; RV32-NEXT: lw a0, 8(sp)
+; RV32-NEXT: lui a7, 524288
+; RV32-NEXT: addi a5, a7, -1
+; RV32-NEXT: beq a1, a5, .LBB21_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: sltu a4, a1, a5
+; RV32-NEXT: or a3, t0, a2
+; RV32-NEXT: bnez a3, .LBB21_3
+; RV32-NEXT: j .LBB21_4
+; RV32-NEXT: .LBB21_2:
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: snez a4, a4
+; RV32-NEXT: or a3, t0, a2
+; RV32-NEXT: beqz a3, .LBB21_4
+; RV32-NEXT: .LBB21_3: # %entry
+; RV32-NEXT: slti a4, a2, 0
+; RV32-NEXT: .LBB21_4: # %entry
+; RV32-NEXT: li a6, -1
+; RV32-NEXT: beqz a4, .LBB21_7
+; RV32-NEXT: # %bb.5: # %entry
+; RV32-NEXT: beq a1, a7, .LBB21_8
+; RV32-NEXT: .LBB21_6: # %entry
+; RV32-NEXT: sltu a4, a7, a1
+; RV32-NEXT: and a3, t0, a2
+; RV32-NEXT: bne a3, a6, .LBB21_9
+; RV32-NEXT: j .LBB21_10
+; RV32-NEXT: .LBB21_7: # %entry
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: li t0, 0
+; RV32-NEXT: li a0, -1
+; RV32-NEXT: mv a1, a5
+; RV32-NEXT: bne a1, a7, .LBB21_6
+; RV32-NEXT: .LBB21_8:
+; RV32-NEXT: snez a4, a0
+; RV32-NEXT: and a3, t0, a2
+; RV32-NEXT: beq a3, a6, .LBB21_10
+; RV32-NEXT: .LBB21_9: # %entry
+; RV32-NEXT: slt a4, a6, a2
+; RV32-NEXT: .LBB21_10: # %entry
+; RV32-NEXT: bnez a4, .LBB21_12
+; RV32-NEXT: # %bb.11: # %entry
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: lui a1, 524288
+; RV32-NEXT: .LBB21_12: # %entry
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: stest_f32i64:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __fixsfti at plt
+; RV64-NEXT: li a2, -1
+; RV64-NEXT: srli a3, a2, 1
+; RV64-NEXT: beqz a1, .LBB21_2
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: slti a4, a1, 0
+; RV64-NEXT: beqz a4, .LBB21_3
+; RV64-NEXT: j .LBB21_4
+; RV64-NEXT: .LBB21_2:
+; RV64-NEXT: sltu a4, a0, a3
+; RV64-NEXT: bnez a4, .LBB21_4
+; RV64-NEXT: .LBB21_3: # %entry
+; RV64-NEXT: li a1, 0
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: .LBB21_4: # %entry
+; RV64-NEXT: slli a3, a2, 63
+; RV64-NEXT: beq a1, a2, .LBB21_6
+; RV64-NEXT: # %bb.5: # %entry
+; RV64-NEXT: slt a1, a2, a1
+; RV64-NEXT: beqz a1, .LBB21_7
+; RV64-NEXT: j .LBB21_8
+; RV64-NEXT: .LBB21_6:
+; RV64-NEXT: sltu a1, a3, a0
+; RV64-NEXT: bnez a1, .LBB21_8
+; RV64-NEXT: .LBB21_7: # %entry
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: .LBB21_8: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+entry:
+ %conv = fptosi float %x to i128
+ %0 = icmp slt i128 %conv, 9223372036854775807
+ %spec.store.select = select i1 %0, i128 %conv, i128 9223372036854775807
+ %1 = icmp sgt i128 %spec.store.select, -9223372036854775808
+ %spec.store.select7 = select i1 %1, i128 %spec.store.select, i128 -9223372036854775808
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @utest_f32i64(float %x) {
+; RV32-LABEL: utest_f32i64:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: call __fixunssfti at plt
+; RV32-NEXT: lw a0, 20(sp)
+; RV32-NEXT: lw a1, 16(sp)
+; RV32-NEXT: beqz a0, .LBB22_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: j .LBB22_3
+; RV32-NEXT: .LBB22_2:
+; RV32-NEXT: seqz a2, a1
+; RV32-NEXT: .LBB22_3: # %entry
+; RV32-NEXT: xori a1, a1, 1
+; RV32-NEXT: or a1, a1, a0
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: beqz a1, .LBB22_5
+; RV32-NEXT: # %bb.4: # %entry
+; RV32-NEXT: mv a0, a2
+; RV32-NEXT: .LBB22_5: # %entry
+; RV32-NEXT: bnez a0, .LBB22_7
+; RV32-NEXT: # %bb.6: # %entry
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: j .LBB22_8
+; RV32-NEXT: .LBB22_7:
+; RV32-NEXT: lw a1, 12(sp)
+; RV32-NEXT: lw a0, 8(sp)
+; RV32-NEXT: .LBB22_8: # %entry
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: utest_f32i64:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __fixunssfti at plt
+; RV64-NEXT: beqz a1, .LBB22_2
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: .LBB22_2: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+entry:
+ %conv = fptoui float %x to i128
+ %0 = icmp ult i128 %conv, 18446744073709551616
+ %spec.store.select = select i1 %0, i128 %conv, i128 18446744073709551616
+ %conv6 = trunc i128 %spec.store.select to i64
+ ret i64 %conv6
+}
+
+define i64 @ustest_f32i64(float %x) {
+; RV32-LABEL: ustest_f32i64:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: call __fixsfti at plt
+; RV32-NEXT: lw a2, 20(sp)
+; RV32-NEXT: lw a3, 16(sp)
+; RV32-NEXT: beqz a2, .LBB23_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: slti a0, a2, 0
+; RV32-NEXT: j .LBB23_3
+; RV32-NEXT: .LBB23_2:
+; RV32-NEXT: seqz a0, a3
+; RV32-NEXT: .LBB23_3: # %entry
+; RV32-NEXT: xori a1, a3, 1
+; RV32-NEXT: or a4, a1, a2
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: beqz a4, .LBB23_5
+; RV32-NEXT: # %bb.4: # %entry
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: .LBB23_5: # %entry
+; RV32-NEXT: bnez a1, .LBB23_9
+; RV32-NEXT: # %bb.6: # %entry
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: li a3, 1
+; RV32-NEXT: bnez a2, .LBB23_10
+; RV32-NEXT: .LBB23_7:
+; RV32-NEXT: snez a4, a3
+; RV32-NEXT: bnez a1, .LBB23_11
+; RV32-NEXT: .LBB23_8:
+; RV32-NEXT: snez a5, a0
+; RV32-NEXT: or a2, a3, a2
+; RV32-NEXT: bnez a2, .LBB23_12
+; RV32-NEXT: j .LBB23_13
+; RV32-NEXT: .LBB23_9:
+; RV32-NEXT: lw a1, 12(sp)
+; RV32-NEXT: lw a0, 8(sp)
+; RV32-NEXT: beqz a2, .LBB23_7
+; RV32-NEXT: .LBB23_10: # %entry
+; RV32-NEXT: sgtz a4, a2
+; RV32-NEXT: beqz a1, .LBB23_8
+; RV32-NEXT: .LBB23_11: # %entry
+; RV32-NEXT: snez a5, a1
+; RV32-NEXT: or a2, a3, a2
+; RV32-NEXT: beqz a2, .LBB23_13
+; RV32-NEXT: .LBB23_12: # %entry
+; RV32-NEXT: mv a5, a4
+; RV32-NEXT: .LBB23_13: # %entry
+; RV32-NEXT: bnez a5, .LBB23_15
+; RV32-NEXT: # %bb.14: # %entry
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: .LBB23_15: # %entry
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: ustest_f32i64:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __fixsfti at plt
+; RV64-NEXT: blez a1, .LBB23_2
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: li a1, 1
+; RV64-NEXT: .LBB23_2: # %entry
+; RV64-NEXT: beqz a1, .LBB23_4
+; RV64-NEXT: # %bb.3: # %entry
+; RV64-NEXT: sgtz a1, a1
+; RV64-NEXT: beqz a1, .LBB23_5
+; RV64-NEXT: j .LBB23_6
+; RV64-NEXT: .LBB23_4:
+; RV64-NEXT: snez a1, a0
+; RV64-NEXT: bnez a1, .LBB23_6
+; RV64-NEXT: .LBB23_5: # %entry
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: .LBB23_6: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+entry:
+ %conv = fptosi float %x to i128
+ %0 = icmp slt i128 %conv, 18446744073709551616
+ %spec.store.select = select i1 %0, i128 %conv, i128 18446744073709551616
+ %1 = icmp sgt i128 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i128 %spec.store.select, i128 0
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @stest_f16i64(half %x) {
+; RV32-LABEL: stest_f16i64:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __gnu_h2f_ieee at plt
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: call __fixsfti at plt
+; RV32-NEXT: lw a2, 20(sp)
+; RV32-NEXT: lw t0, 16(sp)
+; RV32-NEXT: lw a1, 12(sp)
+; RV32-NEXT: lw a0, 8(sp)
+; RV32-NEXT: lui a7, 524288
+; RV32-NEXT: addi a5, a7, -1
+; RV32-NEXT: beq a1, a5, .LBB24_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: sltu a4, a1, a5
+; RV32-NEXT: or a3, t0, a2
+; RV32-NEXT: bnez a3, .LBB24_3
+; RV32-NEXT: j .LBB24_4
+; RV32-NEXT: .LBB24_2:
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: snez a4, a4
+; RV32-NEXT: or a3, t0, a2
+; RV32-NEXT: beqz a3, .LBB24_4
+; RV32-NEXT: .LBB24_3: # %entry
+; RV32-NEXT: slti a4, a2, 0
+; RV32-NEXT: .LBB24_4: # %entry
+; RV32-NEXT: li a6, -1
+; RV32-NEXT: beqz a4, .LBB24_7
+; RV32-NEXT: # %bb.5: # %entry
+; RV32-NEXT: beq a1, a7, .LBB24_8
+; RV32-NEXT: .LBB24_6: # %entry
+; RV32-NEXT: sltu a4, a7, a1
+; RV32-NEXT: and a3, t0, a2
+; RV32-NEXT: bne a3, a6, .LBB24_9
+; RV32-NEXT: j .LBB24_10
+; RV32-NEXT: .LBB24_7: # %entry
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: li t0, 0
+; RV32-NEXT: li a0, -1
+; RV32-NEXT: mv a1, a5
+; RV32-NEXT: bne a1, a7, .LBB24_6
+; RV32-NEXT: .LBB24_8:
+; RV32-NEXT: snez a4, a0
+; RV32-NEXT: and a3, t0, a2
+; RV32-NEXT: beq a3, a6, .LBB24_10
+; RV32-NEXT: .LBB24_9: # %entry
+; RV32-NEXT: slt a4, a6, a2
+; RV32-NEXT: .LBB24_10: # %entry
+; RV32-NEXT: bnez a4, .LBB24_12
+; RV32-NEXT: # %bb.11: # %entry
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: lui a1, 524288
+; RV32-NEXT: .LBB24_12: # %entry
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: stest_f16i64:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __gnu_h2f_ieee at plt
+; RV64-NEXT: call __fixsfti at plt
+; RV64-NEXT: li a2, -1
+; RV64-NEXT: srli a3, a2, 1
+; RV64-NEXT: beqz a1, .LBB24_2
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: slti a4, a1, 0
+; RV64-NEXT: beqz a4, .LBB24_3
+; RV64-NEXT: j .LBB24_4
+; RV64-NEXT: .LBB24_2:
+; RV64-NEXT: sltu a4, a0, a3
+; RV64-NEXT: bnez a4, .LBB24_4
+; RV64-NEXT: .LBB24_3: # %entry
+; RV64-NEXT: li a1, 0
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: .LBB24_4: # %entry
+; RV64-NEXT: slli a3, a2, 63
+; RV64-NEXT: beq a1, a2, .LBB24_6
+; RV64-NEXT: # %bb.5: # %entry
+; RV64-NEXT: slt a1, a2, a1
+; RV64-NEXT: beqz a1, .LBB24_7
+; RV64-NEXT: j .LBB24_8
+; RV64-NEXT: .LBB24_6:
+; RV64-NEXT: sltu a1, a3, a0
+; RV64-NEXT: bnez a1, .LBB24_8
+; RV64-NEXT: .LBB24_7: # %entry
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: .LBB24_8: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+entry:
+ %conv = fptosi half %x to i128
+ %0 = icmp slt i128 %conv, 9223372036854775807
+ %spec.store.select = select i1 %0, i128 %conv, i128 9223372036854775807
+ %1 = icmp sgt i128 %spec.store.select, -9223372036854775808
+ %spec.store.select7 = select i1 %1, i128 %spec.store.select, i128 -9223372036854775808
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @utesth_f16i64(half %x) {
+; RV32-LABEL: utesth_f16i64:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __gnu_h2f_ieee at plt
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: call __fixunssfti at plt
+; RV32-NEXT: lw a0, 20(sp)
+; RV32-NEXT: lw a1, 16(sp)
+; RV32-NEXT: beqz a0, .LBB25_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: j .LBB25_3
+; RV32-NEXT: .LBB25_2:
+; RV32-NEXT: seqz a2, a1
+; RV32-NEXT: .LBB25_3: # %entry
+; RV32-NEXT: xori a1, a1, 1
+; RV32-NEXT: or a1, a1, a0
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: beqz a1, .LBB25_5
+; RV32-NEXT: # %bb.4: # %entry
+; RV32-NEXT: mv a0, a2
+; RV32-NEXT: .LBB25_5: # %entry
+; RV32-NEXT: bnez a0, .LBB25_7
+; RV32-NEXT: # %bb.6: # %entry
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: j .LBB25_8
+; RV32-NEXT: .LBB25_7:
+; RV32-NEXT: lw a1, 12(sp)
+; RV32-NEXT: lw a0, 8(sp)
+; RV32-NEXT: .LBB25_8: # %entry
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: utesth_f16i64:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __gnu_h2f_ieee at plt
+; RV64-NEXT: call __fixunssfti at plt
+; RV64-NEXT: beqz a1, .LBB25_2
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: .LBB25_2: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+entry:
+ %conv = fptoui half %x to i128
+ %0 = icmp ult i128 %conv, 18446744073709551616
+ %spec.store.select = select i1 %0, i128 %conv, i128 18446744073709551616
+ %conv6 = trunc i128 %spec.store.select to i64
+ ret i64 %conv6
+}
+
+define i64 @ustest_f16i64(half %x) {
+; RV32-LABEL: ustest_f16i64:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __gnu_h2f_ieee at plt
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: call __fixsfti at plt
+; RV32-NEXT: lw a2, 20(sp)
+; RV32-NEXT: lw a3, 16(sp)
+; RV32-NEXT: beqz a2, .LBB26_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: slti a0, a2, 0
+; RV32-NEXT: j .LBB26_3
+; RV32-NEXT: .LBB26_2:
+; RV32-NEXT: seqz a0, a3
+; RV32-NEXT: .LBB26_3: # %entry
+; RV32-NEXT: xori a1, a3, 1
+; RV32-NEXT: or a4, a1, a2
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: beqz a4, .LBB26_5
+; RV32-NEXT: # %bb.4: # %entry
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: .LBB26_5: # %entry
+; RV32-NEXT: bnez a1, .LBB26_9
+; RV32-NEXT: # %bb.6: # %entry
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: li a3, 1
+; RV32-NEXT: bnez a2, .LBB26_10
+; RV32-NEXT: .LBB26_7:
+; RV32-NEXT: snez a4, a3
+; RV32-NEXT: bnez a1, .LBB26_11
+; RV32-NEXT: .LBB26_8:
+; RV32-NEXT: snez a5, a0
+; RV32-NEXT: or a2, a3, a2
+; RV32-NEXT: bnez a2, .LBB26_12
+; RV32-NEXT: j .LBB26_13
+; RV32-NEXT: .LBB26_9:
+; RV32-NEXT: lw a1, 12(sp)
+; RV32-NEXT: lw a0, 8(sp)
+; RV32-NEXT: beqz a2, .LBB26_7
+; RV32-NEXT: .LBB26_10: # %entry
+; RV32-NEXT: sgtz a4, a2
+; RV32-NEXT: beqz a1, .LBB26_8
+; RV32-NEXT: .LBB26_11: # %entry
+; RV32-NEXT: snez a5, a1
+; RV32-NEXT: or a2, a3, a2
+; RV32-NEXT: beqz a2, .LBB26_13
+; RV32-NEXT: .LBB26_12: # %entry
+; RV32-NEXT: mv a5, a4
+; RV32-NEXT: .LBB26_13: # %entry
+; RV32-NEXT: bnez a5, .LBB26_15
+; RV32-NEXT: # %bb.14: # %entry
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: .LBB26_15: # %entry
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: ustest_f16i64:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __gnu_h2f_ieee at plt
+; RV64-NEXT: call __fixsfti at plt
+; RV64-NEXT: blez a1, .LBB26_2
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: li a1, 1
+; RV64-NEXT: .LBB26_2: # %entry
+; RV64-NEXT: beqz a1, .LBB26_4
+; RV64-NEXT: # %bb.3: # %entry
+; RV64-NEXT: sgtz a1, a1
+; RV64-NEXT: beqz a1, .LBB26_5
+; RV64-NEXT: j .LBB26_6
+; RV64-NEXT: .LBB26_4:
+; RV64-NEXT: snez a1, a0
+; RV64-NEXT: bnez a1, .LBB26_6
+; RV64-NEXT: .LBB26_5: # %entry
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: .LBB26_6: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+entry:
+ %conv = fptosi half %x to i128
+ %0 = icmp slt i128 %conv, 18446744073709551616
+ %spec.store.select = select i1 %0, i128 %conv, i128 18446744073709551616
+ %1 = icmp sgt i128 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i128 %spec.store.select, i128 0
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+
+
+
+; i32 saturate
+
+define i32 @stest_f64i32_mm(double %x) {
+; RV32-LABEL: stest_f64i32_mm:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __fixdfdi at plt
+; RV32-NEXT: lui a2, 524288
+; RV32-NEXT: addi a4, a2, -1
+; RV32-NEXT: mv a3, a0
+; RV32-NEXT: bgez a1, .LBB27_9
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: bgeu a0, a4, .LBB27_10
+; RV32-NEXT: .LBB27_2: # %entry
+; RV32-NEXT: bnez a1, .LBB27_11
+; RV32-NEXT: .LBB27_3: # %entry
+; RV32-NEXT: bgez a1, .LBB27_12
+; RV32-NEXT: .LBB27_4: # %entry
+; RV32-NEXT: mv a3, a0
+; RV32-NEXT: bltz a1, .LBB27_13
+; RV32-NEXT: .LBB27_5: # %entry
+; RV32-NEXT: bgeu a2, a0, .LBB27_14
+; RV32-NEXT: .LBB27_6: # %entry
+; RV32-NEXT: li a2, -1
+; RV32-NEXT: beq a1, a2, .LBB27_8
+; RV32-NEXT: .LBB27_7: # %entry
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: .LBB27_8: # %entry
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB27_9: # %entry
+; RV32-NEXT: mv a3, a4
+; RV32-NEXT: bltu a0, a4, .LBB27_2
+; RV32-NEXT: .LBB27_10: # %entry
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: beqz a1, .LBB27_3
+; RV32-NEXT: .LBB27_11: # %entry
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: bltz a1, .LBB27_4
+; RV32-NEXT: .LBB27_12: # %entry
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: mv a3, a0
+; RV32-NEXT: bgez a1, .LBB27_5
+; RV32-NEXT: .LBB27_13: # %entry
+; RV32-NEXT: lui a3, 524288
+; RV32-NEXT: bltu a2, a0, .LBB27_6
+; RV32-NEXT: .LBB27_14: # %entry
+; RV32-NEXT: lui a0, 524288
+; RV32-NEXT: li a2, -1
+; RV32-NEXT: bne a1, a2, .LBB27_7
+; RV32-NEXT: j .LBB27_8
+;
+; RV64IF-LABEL: stest_f64i32_mm:
+; RV64IF: # %bb.0: # %entry
+; RV64IF-NEXT: addi sp, sp, -16
+; RV64IF-NEXT: .cfi_def_cfa_offset 16
+; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IF-NEXT: .cfi_offset ra, -8
+; RV64IF-NEXT: call __fixdfdi at plt
+; RV64IF-NEXT: lui a1, 524288
+; RV64IF-NEXT: addiw a2, a1, -1
+; RV64IF-NEXT: blt a0, a2, .LBB27_2
+; RV64IF-NEXT: # %bb.1: # %entry
+; RV64IF-NEXT: mv a0, a2
+; RV64IF-NEXT: .LBB27_2: # %entry
+; RV64IF-NEXT: blt a1, a0, .LBB27_4
+; RV64IF-NEXT: # %bb.3: # %entry
+; RV64IF-NEXT: lui a0, 524288
+; RV64IF-NEXT: .LBB27_4: # %entry
+; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IF-NEXT: addi sp, sp, 16
+; RV64IF-NEXT: ret
+;
+; RV64IFD-LABEL: stest_f64i32_mm:
+; RV64IFD: # %bb.0: # %entry
+; RV64IFD-NEXT: fmv.d.x ft0, a0
+; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz
+; RV64IFD-NEXT: lui a1, 524288
+; RV64IFD-NEXT: addiw a2, a1, -1
+; RV64IFD-NEXT: bge a0, a2, .LBB27_3
+; RV64IFD-NEXT: # %bb.1: # %entry
+; RV64IFD-NEXT: bge a1, a0, .LBB27_4
+; RV64IFD-NEXT: .LBB27_2: # %entry
+; RV64IFD-NEXT: ret
+; RV64IFD-NEXT: .LBB27_3: # %entry
+; RV64IFD-NEXT: mv a0, a2
+; RV64IFD-NEXT: blt a1, a0, .LBB27_2
+; RV64IFD-NEXT: .LBB27_4: # %entry
+; RV64IFD-NEXT: lui a0, 524288
+; RV64IFD-NEXT: ret
+entry:
+ %conv = fptosi double %x to i64
+ %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647)
+ %spec.store.select7 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 -2147483648)
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @utest_f64i32_mm(double %x) {
+; RV32-LABEL: utest_f64i32_mm:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __fixunsdfdi at plt
+; RV32-NEXT: beqz a1, .LBB28_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: li a0, -1
+; RV32-NEXT: .LBB28_2: # %entry
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64IF-LABEL: utest_f64i32_mm:
+; RV64IF: # %bb.0: # %entry
+; RV64IF-NEXT: addi sp, sp, -16
+; RV64IF-NEXT: .cfi_def_cfa_offset 16
+; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IF-NEXT: .cfi_offset ra, -8
+; RV64IF-NEXT: call __fixunsdfdi at plt
+; RV64IF-NEXT: li a1, -1
+; RV64IF-NEXT: srli a1, a1, 32
+; RV64IF-NEXT: bltu a0, a1, .LBB28_2
+; RV64IF-NEXT: # %bb.1: # %entry
+; RV64IF-NEXT: mv a0, a1
+; RV64IF-NEXT: .LBB28_2: # %entry
+; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IF-NEXT: addi sp, sp, 16
+; RV64IF-NEXT: ret
+;
+; RV64IFD-LABEL: utest_f64i32_mm:
+; RV64IFD: # %bb.0: # %entry
+; RV64IFD-NEXT: fmv.d.x ft0, a0
+; RV64IFD-NEXT: fcvt.lu.d a0, ft0, rtz
+; RV64IFD-NEXT: li a1, -1
+; RV64IFD-NEXT: srli a1, a1, 32
+; RV64IFD-NEXT: bltu a0, a1, .LBB28_2
+; RV64IFD-NEXT: # %bb.1: # %entry
+; RV64IFD-NEXT: mv a0, a1
+; RV64IFD-NEXT: .LBB28_2: # %entry
+; RV64IFD-NEXT: ret
+entry:
+ %conv = fptoui double %x to i64
+ %spec.store.select = call i64 @llvm.umin.i64(i64 %conv, i64 4294967295)
+ %conv6 = trunc i64 %spec.store.select to i32
+ ret i32 %conv6
+}
+
+define i32 @ustest_f64i32_mm(double %x) {
+; RV32-LABEL: ustest_f64i32_mm:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __fixdfdi at plt
+; RV32-NEXT: mv a2, a0
+; RV32-NEXT: bgez a1, .LBB29_7
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: bnez a1, .LBB29_8
+; RV32-NEXT: .LBB29_2: # %entry
+; RV32-NEXT: bgez a1, .LBB29_9
+; RV32-NEXT: .LBB29_3: # %entry
+; RV32-NEXT: mv a2, a0
+; RV32-NEXT: blez a1, .LBB29_10
+; RV32-NEXT: .LBB29_4: # %entry
+; RV32-NEXT: beqz a1, .LBB29_6
+; RV32-NEXT: .LBB29_5: # %entry
+; RV32-NEXT: mv a0, a2
+; RV32-NEXT: .LBB29_6: # %entry
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB29_7: # %entry
+; RV32-NEXT: li a2, -1
+; RV32-NEXT: beqz a1, .LBB29_2
+; RV32-NEXT: .LBB29_8: # %entry
+; RV32-NEXT: mv a0, a2
+; RV32-NEXT: bltz a1, .LBB29_3
+; RV32-NEXT: .LBB29_9: # %entry
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: mv a2, a0
+; RV32-NEXT: bgtz a1, .LBB29_4
+; RV32-NEXT: .LBB29_10: # %entry
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: bnez a1, .LBB29_5
+; RV32-NEXT: j .LBB29_6
+;
+; RV64IF-LABEL: ustest_f64i32_mm:
+; RV64IF: # %bb.0: # %entry
+; RV64IF-NEXT: addi sp, sp, -16
+; RV64IF-NEXT: .cfi_def_cfa_offset 16
+; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IF-NEXT: .cfi_offset ra, -8
+; RV64IF-NEXT: call __fixdfdi at plt
+; RV64IF-NEXT: li a1, -1
+; RV64IF-NEXT: srli a1, a1, 32
+; RV64IF-NEXT: blt a0, a1, .LBB29_2
+; RV64IF-NEXT: # %bb.1: # %entry
+; RV64IF-NEXT: mv a0, a1
+; RV64IF-NEXT: .LBB29_2: # %entry
+; RV64IF-NEXT: bgtz a0, .LBB29_4
+; RV64IF-NEXT: # %bb.3: # %entry
+; RV64IF-NEXT: li a0, 0
+; RV64IF-NEXT: .LBB29_4: # %entry
+; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IF-NEXT: addi sp, sp, 16
+; RV64IF-NEXT: ret
+;
+; RV64IFD-LABEL: ustest_f64i32_mm:
+; RV64IFD: # %bb.0: # %entry
+; RV64IFD-NEXT: fmv.d.x ft0, a0
+; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz
+; RV64IFD-NEXT: li a1, -1
+; RV64IFD-NEXT: srli a1, a1, 32
+; RV64IFD-NEXT: bge a0, a1, .LBB29_3
+; RV64IFD-NEXT: # %bb.1: # %entry
+; RV64IFD-NEXT: blez a0, .LBB29_4
+; RV64IFD-NEXT: .LBB29_2: # %entry
+; RV64IFD-NEXT: ret
+; RV64IFD-NEXT: .LBB29_3: # %entry
+; RV64IFD-NEXT: mv a0, a1
+; RV64IFD-NEXT: bgtz a0, .LBB29_2
+; RV64IFD-NEXT: .LBB29_4: # %entry
+; RV64IFD-NEXT: li a0, 0
+; RV64IFD-NEXT: ret
+entry:
+ %conv = fptosi double %x to i64
+ %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 4294967295)
+ %spec.store.select7 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 0)
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @stest_f32i32_mm(float %x) {
+; RV32-LABEL: stest_f32i32_mm:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __fixsfdi at plt
+; RV32-NEXT: lui a2, 524288
+; RV32-NEXT: addi a4, a2, -1
+; RV32-NEXT: mv a3, a0
+; RV32-NEXT: bgez a1, .LBB30_9
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: bgeu a0, a4, .LBB30_10
+; RV32-NEXT: .LBB30_2: # %entry
+; RV32-NEXT: bnez a1, .LBB30_11
+; RV32-NEXT: .LBB30_3: # %entry
+; RV32-NEXT: bgez a1, .LBB30_12
+; RV32-NEXT: .LBB30_4: # %entry
+; RV32-NEXT: mv a3, a0
+; RV32-NEXT: bltz a1, .LBB30_13
+; RV32-NEXT: .LBB30_5: # %entry
+; RV32-NEXT: bgeu a2, a0, .LBB30_14
+; RV32-NEXT: .LBB30_6: # %entry
+; RV32-NEXT: li a2, -1
+; RV32-NEXT: beq a1, a2, .LBB30_8
+; RV32-NEXT: .LBB30_7: # %entry
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: .LBB30_8: # %entry
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB30_9: # %entry
+; RV32-NEXT: mv a3, a4
+; RV32-NEXT: bltu a0, a4, .LBB30_2
+; RV32-NEXT: .LBB30_10: # %entry
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: beqz a1, .LBB30_3
+; RV32-NEXT: .LBB30_11: # %entry
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: bltz a1, .LBB30_4
+; RV32-NEXT: .LBB30_12: # %entry
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: mv a3, a0
+; RV32-NEXT: bgez a1, .LBB30_5
+; RV32-NEXT: .LBB30_13: # %entry
+; RV32-NEXT: lui a3, 524288
+; RV32-NEXT: bltu a2, a0, .LBB30_6
+; RV32-NEXT: .LBB30_14: # %entry
+; RV32-NEXT: lui a0, 524288
+; RV32-NEXT: li a2, -1
+; RV32-NEXT: bne a1, a2, .LBB30_7
+; RV32-NEXT: j .LBB30_8
+;
+; RV64-LABEL: stest_f32i32_mm:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: fmv.w.x ft0, a0
+; RV64-NEXT: fcvt.l.s a0, ft0, rtz
+; RV64-NEXT: lui a1, 524288
+; RV64-NEXT: addiw a2, a1, -1
+; RV64-NEXT: bge a0, a2, .LBB30_3
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: bge a1, a0, .LBB30_4
+; RV64-NEXT: .LBB30_2: # %entry
+; RV64-NEXT: ret
+; RV64-NEXT: .LBB30_3: # %entry
+; RV64-NEXT: mv a0, a2
+; RV64-NEXT: blt a1, a0, .LBB30_2
+; RV64-NEXT: .LBB30_4: # %entry
+; RV64-NEXT: lui a0, 524288
+; RV64-NEXT: ret
+entry:
+ %conv = fptosi float %x to i64
+ %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647)
+ %spec.store.select7 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 -2147483648)
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @utest_f32i32_mm(float %x) {
+; RV32-LABEL: utest_f32i32_mm:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __fixunssfdi at plt
+; RV32-NEXT: beqz a1, .LBB31_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: li a0, -1
+; RV32-NEXT: .LBB31_2: # %entry
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: utest_f32i32_mm:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: fmv.w.x ft0, a0
+; RV64-NEXT: fcvt.lu.s a0, ft0, rtz
+; RV64-NEXT: li a1, -1
+; RV64-NEXT: srli a1, a1, 32
+; RV64-NEXT: bltu a0, a1, .LBB31_2
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: .LBB31_2: # %entry
+; RV64-NEXT: ret
+entry:
+ %conv = fptoui float %x to i64
+ %spec.store.select = call i64 @llvm.umin.i64(i64 %conv, i64 4294967295)
+ %conv6 = trunc i64 %spec.store.select to i32
+ ret i32 %conv6
+}
+
+define i32 @ustest_f32i32_mm(float %x) {
+; RV32-LABEL: ustest_f32i32_mm:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __fixsfdi at plt
+; RV32-NEXT: mv a2, a0
+; RV32-NEXT: bgez a1, .LBB32_7
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: bnez a1, .LBB32_8
+; RV32-NEXT: .LBB32_2: # %entry
+; RV32-NEXT: bgez a1, .LBB32_9
+; RV32-NEXT: .LBB32_3: # %entry
+; RV32-NEXT: mv a2, a0
+; RV32-NEXT: blez a1, .LBB32_10
+; RV32-NEXT: .LBB32_4: # %entry
+; RV32-NEXT: beqz a1, .LBB32_6
+; RV32-NEXT: .LBB32_5: # %entry
+; RV32-NEXT: mv a0, a2
+; RV32-NEXT: .LBB32_6: # %entry
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB32_7: # %entry
+; RV32-NEXT: li a2, -1
+; RV32-NEXT: beqz a1, .LBB32_2
+; RV32-NEXT: .LBB32_8: # %entry
+; RV32-NEXT: mv a0, a2
+; RV32-NEXT: bltz a1, .LBB32_3
+; RV32-NEXT: .LBB32_9: # %entry
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: mv a2, a0
+; RV32-NEXT: bgtz a1, .LBB32_4
+; RV32-NEXT: .LBB32_10: # %entry
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: bnez a1, .LBB32_5
+; RV32-NEXT: j .LBB32_6
+;
+; RV64-LABEL: ustest_f32i32_mm:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: fmv.w.x ft0, a0
+; RV64-NEXT: fcvt.l.s a0, ft0, rtz
+; RV64-NEXT: li a1, -1
+; RV64-NEXT: srli a1, a1, 32
+; RV64-NEXT: bge a0, a1, .LBB32_3
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: blez a0, .LBB32_4
+; RV64-NEXT: .LBB32_2: # %entry
+; RV64-NEXT: ret
+; RV64-NEXT: .LBB32_3: # %entry
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: bgtz a0, .LBB32_2
+; RV64-NEXT: .LBB32_4: # %entry
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: ret
+entry:
+ %conv = fptosi float %x to i64
+ %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 4294967295)
+ %spec.store.select7 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 0)
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @stest_f16i32_mm(half %x) {
+; RV32-LABEL: stest_f16i32_mm:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __gnu_h2f_ieee at plt
+; RV32-NEXT: call __fixsfdi at plt
+; RV32-NEXT: lui a2, 524288
+; RV32-NEXT: addi a4, a2, -1
+; RV32-NEXT: mv a3, a0
+; RV32-NEXT: bgez a1, .LBB33_9
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: bgeu a0, a4, .LBB33_10
+; RV32-NEXT: .LBB33_2: # %entry
+; RV32-NEXT: bnez a1, .LBB33_11
+; RV32-NEXT: .LBB33_3: # %entry
+; RV32-NEXT: bgez a1, .LBB33_12
+; RV32-NEXT: .LBB33_4: # %entry
+; RV32-NEXT: mv a3, a0
+; RV32-NEXT: bltz a1, .LBB33_13
+; RV32-NEXT: .LBB33_5: # %entry
+; RV32-NEXT: bgeu a2, a0, .LBB33_14
+; RV32-NEXT: .LBB33_6: # %entry
+; RV32-NEXT: li a2, -1
+; RV32-NEXT: beq a1, a2, .LBB33_8
+; RV32-NEXT: .LBB33_7: # %entry
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: .LBB33_8: # %entry
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB33_9: # %entry
+; RV32-NEXT: mv a3, a4
+; RV32-NEXT: bltu a0, a4, .LBB33_2
+; RV32-NEXT: .LBB33_10: # %entry
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: beqz a1, .LBB33_3
+; RV32-NEXT: .LBB33_11: # %entry
+; RV32-NEXT: mv a0, a3
+; RV32-NEXT: bltz a1, .LBB33_4
+; RV32-NEXT: .LBB33_12: # %entry
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: mv a3, a0
+; RV32-NEXT: bgez a1, .LBB33_5
+; RV32-NEXT: .LBB33_13: # %entry
+; RV32-NEXT: lui a3, 524288
+; RV32-NEXT: bltu a2, a0, .LBB33_6
+; RV32-NEXT: .LBB33_14: # %entry
+; RV32-NEXT: lui a0, 524288
+; RV32-NEXT: li a2, -1
+; RV32-NEXT: bne a1, a2, .LBB33_7
+; RV32-NEXT: j .LBB33_8
+;
+; RV64-LABEL: stest_f16i32_mm:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __gnu_h2f_ieee at plt
+; RV64-NEXT: fmv.w.x ft0, a0
+; RV64-NEXT: fcvt.l.s a0, ft0, rtz
+; RV64-NEXT: lui a1, 524288
+; RV64-NEXT: addiw a2, a1, -1
+; RV64-NEXT: blt a0, a2, .LBB33_2
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: mv a0, a2
+; RV64-NEXT: .LBB33_2: # %entry
+; RV64-NEXT: blt a1, a0, .LBB33_4
+; RV64-NEXT: # %bb.3: # %entry
+; RV64-NEXT: lui a0, 524288
+; RV64-NEXT: .LBB33_4: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+entry:
+ %conv = fptosi half %x to i64
+ %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647)
+ %spec.store.select7 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 -2147483648)
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @utesth_f16i32_mm(half %x) {
+; RV32-LABEL: utesth_f16i32_mm:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __gnu_h2f_ieee at plt
+; RV32-NEXT: call __fixunssfdi at plt
+; RV32-NEXT: beqz a1, .LBB34_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: li a0, -1
+; RV32-NEXT: .LBB34_2: # %entry
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: utesth_f16i32_mm:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __gnu_h2f_ieee at plt
+; RV64-NEXT: fmv.w.x ft0, a0
+; RV64-NEXT: fcvt.lu.s a0, ft0, rtz
+; RV64-NEXT: li a1, -1
+; RV64-NEXT: srli a1, a1, 32
+; RV64-NEXT: bltu a0, a1, .LBB34_2
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: .LBB34_2: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+entry:
+ %conv = fptoui half %x to i64
+ %spec.store.select = call i64 @llvm.umin.i64(i64 %conv, i64 4294967295)
+ %conv6 = trunc i64 %spec.store.select to i32
+ ret i32 %conv6
+}
+
+define i32 @ustest_f16i32_mm(half %x) {
+; RV32-LABEL: ustest_f16i32_mm:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __gnu_h2f_ieee at plt
+; RV32-NEXT: call __fixsfdi at plt
+; RV32-NEXT: mv a2, a0
+; RV32-NEXT: bgez a1, .LBB35_7
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: bnez a1, .LBB35_8
+; RV32-NEXT: .LBB35_2: # %entry
+; RV32-NEXT: bgez a1, .LBB35_9
+; RV32-NEXT: .LBB35_3: # %entry
+; RV32-NEXT: mv a2, a0
+; RV32-NEXT: blez a1, .LBB35_10
+; RV32-NEXT: .LBB35_4: # %entry
+; RV32-NEXT: beqz a1, .LBB35_6
+; RV32-NEXT: .LBB35_5: # %entry
+; RV32-NEXT: mv a0, a2
+; RV32-NEXT: .LBB35_6: # %entry
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB35_7: # %entry
+; RV32-NEXT: li a2, -1
+; RV32-NEXT: beqz a1, .LBB35_2
+; RV32-NEXT: .LBB35_8: # %entry
+; RV32-NEXT: mv a0, a2
+; RV32-NEXT: bltz a1, .LBB35_3
+; RV32-NEXT: .LBB35_9: # %entry
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: mv a2, a0
+; RV32-NEXT: bgtz a1, .LBB35_4
+; RV32-NEXT: .LBB35_10: # %entry
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: bnez a1, .LBB35_5
+; RV32-NEXT: j .LBB35_6
+;
+; RV64-LABEL: ustest_f16i32_mm:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __gnu_h2f_ieee at plt
+; RV64-NEXT: fmv.w.x ft0, a0
+; RV64-NEXT: fcvt.l.s a0, ft0, rtz
+; RV64-NEXT: li a1, -1
+; RV64-NEXT: srli a1, a1, 32
+; RV64-NEXT: blt a0, a1, .LBB35_2
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: .LBB35_2: # %entry
+; RV64-NEXT: bgtz a0, .LBB35_4
+; RV64-NEXT: # %bb.3: # %entry
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: .LBB35_4: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+entry:
+ %conv = fptosi half %x to i64
+ %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 4294967295)
+ %spec.store.select7 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 0)
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+; i16 saturate
+
+define i16 @stest_f64i16_mm(double %x) {
+; RV32IF-LABEL: stest_f64i16_mm:
+; RV32IF: # %bb.0: # %entry
+; RV32IF-NEXT: addi sp, sp, -16
+; RV32IF-NEXT: .cfi_def_cfa_offset 16
+; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: .cfi_offset ra, -4
+; RV32IF-NEXT: call __fixdfsi at plt
+; RV32IF-NEXT: lui a1, 8
+; RV32IF-NEXT: addi a1, a1, -1
+; RV32IF-NEXT: blt a0, a1, .LBB36_2
+; RV32IF-NEXT: # %bb.1: # %entry
+; RV32IF-NEXT: mv a0, a1
+; RV32IF-NEXT: .LBB36_2: # %entry
+; RV32IF-NEXT: lui a1, 1048568
+; RV32IF-NEXT: blt a1, a0, .LBB36_4
+; RV32IF-NEXT: # %bb.3: # %entry
+; RV32IF-NEXT: lui a0, 1048568
+; RV32IF-NEXT: .LBB36_4: # %entry
+; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: addi sp, sp, 16
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: stest_f64i16_mm:
+; RV64IF: # %bb.0: # %entry
+; RV64IF-NEXT: addi sp, sp, -16
+; RV64IF-NEXT: .cfi_def_cfa_offset 16
+; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IF-NEXT: .cfi_offset ra, -8
+; RV64IF-NEXT: call __fixdfsi at plt
+; RV64IF-NEXT: lui a1, 8
+; RV64IF-NEXT: addiw a1, a1, -1
+; RV64IF-NEXT: blt a0, a1, .LBB36_2
+; RV64IF-NEXT: # %bb.1: # %entry
+; RV64IF-NEXT: mv a0, a1
+; RV64IF-NEXT: .LBB36_2: # %entry
+; RV64IF-NEXT: lui a1, 1048568
+; RV64IF-NEXT: blt a1, a0, .LBB36_4
+; RV64IF-NEXT: # %bb.3: # %entry
+; RV64IF-NEXT: lui a0, 1048568
+; RV64IF-NEXT: .LBB36_4: # %entry
+; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IF-NEXT: addi sp, sp, 16
+; RV64IF-NEXT: ret
+;
+; RV32IFD-LABEL: stest_f64i16_mm:
+; RV32IFD: # %bb.0: # %entry
+; RV32IFD-NEXT: addi sp, sp, -16
+; RV32IFD-NEXT: .cfi_def_cfa_offset 16
+; RV32IFD-NEXT: sw a0, 8(sp)
+; RV32IFD-NEXT: sw a1, 12(sp)
+; RV32IFD-NEXT: fld ft0, 8(sp)
+; RV32IFD-NEXT: fcvt.w.d a0, ft0, rtz
+; RV32IFD-NEXT: lui a1, 8
+; RV32IFD-NEXT: addi a1, a1, -1
+; RV32IFD-NEXT: bge a0, a1, .LBB36_3
+; RV32IFD-NEXT: # %bb.1: # %entry
+; RV32IFD-NEXT: lui a1, 1048568
+; RV32IFD-NEXT: bge a1, a0, .LBB36_4
+; RV32IFD-NEXT: .LBB36_2: # %entry
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+; RV32IFD-NEXT: .LBB36_3: # %entry
+; RV32IFD-NEXT: mv a0, a1
+; RV32IFD-NEXT: lui a1, 1048568
+; RV32IFD-NEXT: blt a1, a0, .LBB36_2
+; RV32IFD-NEXT: .LBB36_4: # %entry
+; RV32IFD-NEXT: lui a0, 1048568
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+;
+; RV64IFD-LABEL: stest_f64i16_mm:
+; RV64IFD: # %bb.0: # %entry
+; RV64IFD-NEXT: fmv.d.x ft0, a0
+; RV64IFD-NEXT: fcvt.w.d a0, ft0, rtz
+; RV64IFD-NEXT: lui a1, 8
+; RV64IFD-NEXT: addiw a1, a1, -1
+; RV64IFD-NEXT: bge a0, a1, .LBB36_3
+; RV64IFD-NEXT: # %bb.1: # %entry
+; RV64IFD-NEXT: lui a1, 1048568
+; RV64IFD-NEXT: bge a1, a0, .LBB36_4
+; RV64IFD-NEXT: .LBB36_2: # %entry
+; RV64IFD-NEXT: ret
+; RV64IFD-NEXT: .LBB36_3: # %entry
+; RV64IFD-NEXT: mv a0, a1
+; RV64IFD-NEXT: lui a1, 1048568
+; RV64IFD-NEXT: blt a1, a0, .LBB36_2
+; RV64IFD-NEXT: .LBB36_4: # %entry
+; RV64IFD-NEXT: lui a0, 1048568
+; RV64IFD-NEXT: ret
+entry:
+ %conv = fptosi double %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 32767)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 -32768)
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @utest_f64i16_mm(double %x) {
+; RV32IF-LABEL: utest_f64i16_mm:
+; RV32IF: # %bb.0: # %entry
+; RV32IF-NEXT: addi sp, sp, -16
+; RV32IF-NEXT: .cfi_def_cfa_offset 16
+; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: .cfi_offset ra, -4
+; RV32IF-NEXT: call __fixunsdfsi at plt
+; RV32IF-NEXT: lui a1, 16
+; RV32IF-NEXT: addi a1, a1, -1
+; RV32IF-NEXT: bltu a0, a1, .LBB37_2
+; RV32IF-NEXT: # %bb.1: # %entry
+; RV32IF-NEXT: mv a0, a1
+; RV32IF-NEXT: .LBB37_2: # %entry
+; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: addi sp, sp, 16
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: utest_f64i16_mm:
+; RV64IF: # %bb.0: # %entry
+; RV64IF-NEXT: addi sp, sp, -16
+; RV64IF-NEXT: .cfi_def_cfa_offset 16
+; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IF-NEXT: .cfi_offset ra, -8
+; RV64IF-NEXT: call __fixunsdfsi at plt
+; RV64IF-NEXT: lui a1, 16
+; RV64IF-NEXT: addiw a1, a1, -1
+; RV64IF-NEXT: bltu a0, a1, .LBB37_2
+; RV64IF-NEXT: # %bb.1: # %entry
+; RV64IF-NEXT: mv a0, a1
+; RV64IF-NEXT: .LBB37_2: # %entry
+; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IF-NEXT: addi sp, sp, 16
+; RV64IF-NEXT: ret
+;
+; RV32IFD-LABEL: utest_f64i16_mm:
+; RV32IFD: # %bb.0: # %entry
+; RV32IFD-NEXT: addi sp, sp, -16
+; RV32IFD-NEXT: .cfi_def_cfa_offset 16
+; RV32IFD-NEXT: sw a0, 8(sp)
+; RV32IFD-NEXT: sw a1, 12(sp)
+; RV32IFD-NEXT: fld ft0, 8(sp)
+; RV32IFD-NEXT: fcvt.wu.d a0, ft0, rtz
+; RV32IFD-NEXT: lui a1, 16
+; RV32IFD-NEXT: addi a1, a1, -1
+; RV32IFD-NEXT: bltu a0, a1, .LBB37_2
+; RV32IFD-NEXT: # %bb.1: # %entry
+; RV32IFD-NEXT: mv a0, a1
+; RV32IFD-NEXT: .LBB37_2: # %entry
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+;
+; RV64IFD-LABEL: utest_f64i16_mm:
+; RV64IFD: # %bb.0: # %entry
+; RV64IFD-NEXT: fmv.d.x ft0, a0
+; RV64IFD-NEXT: fcvt.wu.d a0, ft0, rtz
+; RV64IFD-NEXT: lui a1, 16
+; RV64IFD-NEXT: addiw a1, a1, -1
+; RV64IFD-NEXT: bltu a0, a1, .LBB37_2
+; RV64IFD-NEXT: # %bb.1: # %entry
+; RV64IFD-NEXT: mv a0, a1
+; RV64IFD-NEXT: .LBB37_2: # %entry
+; RV64IFD-NEXT: ret
+entry:
+ %conv = fptoui double %x to i32
+ %spec.store.select = call i32 @llvm.umin.i32(i32 %conv, i32 65535)
+ %conv6 = trunc i32 %spec.store.select to i16
+ ret i16 %conv6
+}
+
+define i16 @ustest_f64i16_mm(double %x) {
+; RV32IF-LABEL: ustest_f64i16_mm:
+; RV32IF: # %bb.0: # %entry
+; RV32IF-NEXT: addi sp, sp, -16
+; RV32IF-NEXT: .cfi_def_cfa_offset 16
+; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT: .cfi_offset ra, -4
+; RV32IF-NEXT: call __fixdfsi at plt
+; RV32IF-NEXT: lui a1, 16
+; RV32IF-NEXT: addi a1, a1, -1
+; RV32IF-NEXT: blt a0, a1, .LBB38_2
+; RV32IF-NEXT: # %bb.1: # %entry
+; RV32IF-NEXT: mv a0, a1
+; RV32IF-NEXT: .LBB38_2: # %entry
+; RV32IF-NEXT: bgtz a0, .LBB38_4
+; RV32IF-NEXT: # %bb.3: # %entry
+; RV32IF-NEXT: li a0, 0
+; RV32IF-NEXT: .LBB38_4: # %entry
+; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT: addi sp, sp, 16
+; RV32IF-NEXT: ret
+;
+; RV64IF-LABEL: ustest_f64i16_mm:
+; RV64IF: # %bb.0: # %entry
+; RV64IF-NEXT: addi sp, sp, -16
+; RV64IF-NEXT: .cfi_def_cfa_offset 16
+; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64IF-NEXT: .cfi_offset ra, -8
+; RV64IF-NEXT: call __fixdfsi at plt
+; RV64IF-NEXT: lui a1, 16
+; RV64IF-NEXT: addiw a1, a1, -1
+; RV64IF-NEXT: blt a0, a1, .LBB38_2
+; RV64IF-NEXT: # %bb.1: # %entry
+; RV64IF-NEXT: mv a0, a1
+; RV64IF-NEXT: .LBB38_2: # %entry
+; RV64IF-NEXT: bgtz a0, .LBB38_4
+; RV64IF-NEXT: # %bb.3: # %entry
+; RV64IF-NEXT: li a0, 0
+; RV64IF-NEXT: .LBB38_4: # %entry
+; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IF-NEXT: addi sp, sp, 16
+; RV64IF-NEXT: ret
+;
+; RV32IFD-LABEL: ustest_f64i16_mm:
+; RV32IFD: # %bb.0: # %entry
+; RV32IFD-NEXT: addi sp, sp, -16
+; RV32IFD-NEXT: .cfi_def_cfa_offset 16
+; RV32IFD-NEXT: sw a0, 8(sp)
+; RV32IFD-NEXT: sw a1, 12(sp)
+; RV32IFD-NEXT: fld ft0, 8(sp)
+; RV32IFD-NEXT: fcvt.w.d a0, ft0, rtz
+; RV32IFD-NEXT: lui a1, 16
+; RV32IFD-NEXT: addi a1, a1, -1
+; RV32IFD-NEXT: bge a0, a1, .LBB38_3
+; RV32IFD-NEXT: # %bb.1: # %entry
+; RV32IFD-NEXT: blez a0, .LBB38_4
+; RV32IFD-NEXT: .LBB38_2: # %entry
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+; RV32IFD-NEXT: .LBB38_3: # %entry
+; RV32IFD-NEXT: mv a0, a1
+; RV32IFD-NEXT: bgtz a0, .LBB38_2
+; RV32IFD-NEXT: .LBB38_4: # %entry
+; RV32IFD-NEXT: li a0, 0
+; RV32IFD-NEXT: addi sp, sp, 16
+; RV32IFD-NEXT: ret
+;
+; RV64IFD-LABEL: ustest_f64i16_mm:
+; RV64IFD: # %bb.0: # %entry
+; RV64IFD-NEXT: fmv.d.x ft0, a0
+; RV64IFD-NEXT: fcvt.w.d a0, ft0, rtz
+; RV64IFD-NEXT: lui a1, 16
+; RV64IFD-NEXT: addiw a1, a1, -1
+; RV64IFD-NEXT: bge a0, a1, .LBB38_3
+; RV64IFD-NEXT: # %bb.1: # %entry
+; RV64IFD-NEXT: blez a0, .LBB38_4
+; RV64IFD-NEXT: .LBB38_2: # %entry
+; RV64IFD-NEXT: ret
+; RV64IFD-NEXT: .LBB38_3: # %entry
+; RV64IFD-NEXT: mv a0, a1
+; RV64IFD-NEXT: bgtz a0, .LBB38_2
+; RV64IFD-NEXT: .LBB38_4: # %entry
+; RV64IFD-NEXT: li a0, 0
+; RV64IFD-NEXT: ret
+entry:
+ %conv = fptosi double %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 65535)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @stest_f32i16_mm(float %x) {
+; RV32-LABEL: stest_f32i16_mm:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: fmv.w.x ft0, a0
+; RV32-NEXT: fcvt.w.s a0, ft0, rtz
+; RV32-NEXT: lui a1, 8
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: bge a0, a1, .LBB39_3
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: lui a1, 1048568
+; RV32-NEXT: bge a1, a0, .LBB39_4
+; RV32-NEXT: .LBB39_2: # %entry
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB39_3: # %entry
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: lui a1, 1048568
+; RV32-NEXT: blt a1, a0, .LBB39_2
+; RV32-NEXT: .LBB39_4: # %entry
+; RV32-NEXT: lui a0, 1048568
+; RV32-NEXT: ret
+;
+; RV64-LABEL: stest_f32i16_mm:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: fmv.w.x ft0, a0
+; RV64-NEXT: fcvt.w.s a0, ft0, rtz
+; RV64-NEXT: lui a1, 8
+; RV64-NEXT: addiw a1, a1, -1
+; RV64-NEXT: bge a0, a1, .LBB39_3
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: lui a1, 1048568
+; RV64-NEXT: bge a1, a0, .LBB39_4
+; RV64-NEXT: .LBB39_2: # %entry
+; RV64-NEXT: ret
+; RV64-NEXT: .LBB39_3: # %entry
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: lui a1, 1048568
+; RV64-NEXT: blt a1, a0, .LBB39_2
+; RV64-NEXT: .LBB39_4: # %entry
+; RV64-NEXT: lui a0, 1048568
+; RV64-NEXT: ret
+entry:
+ %conv = fptosi float %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 32767)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 -32768)
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @utest_f32i16_mm(float %x) {
+; RV32-LABEL: utest_f32i16_mm:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: fmv.w.x ft0, a0
+; RV32-NEXT: fcvt.wu.s a0, ft0, rtz
+; RV32-NEXT: lui a1, 16
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: bltu a0, a1, .LBB40_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: .LBB40_2: # %entry
+; RV32-NEXT: ret
+;
+; RV64-LABEL: utest_f32i16_mm:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: fmv.w.x ft0, a0
+; RV64-NEXT: fcvt.wu.s a0, ft0, rtz
+; RV64-NEXT: lui a1, 16
+; RV64-NEXT: addiw a1, a1, -1
+; RV64-NEXT: bltu a0, a1, .LBB40_2
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: .LBB40_2: # %entry
+; RV64-NEXT: ret
+entry:
+ %conv = fptoui float %x to i32
+ %spec.store.select = call i32 @llvm.umin.i32(i32 %conv, i32 65535)
+ %conv6 = trunc i32 %spec.store.select to i16
+ ret i16 %conv6
+}
+
+define i16 @ustest_f32i16_mm(float %x) {
+; RV32-LABEL: ustest_f32i16_mm:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: fmv.w.x ft0, a0
+; RV32-NEXT: fcvt.w.s a0, ft0, rtz
+; RV32-NEXT: lui a1, 16
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: bge a0, a1, .LBB41_3
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: blez a0, .LBB41_4
+; RV32-NEXT: .LBB41_2: # %entry
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB41_3: # %entry
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: bgtz a0, .LBB41_2
+; RV32-NEXT: .LBB41_4: # %entry
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: ustest_f32i16_mm:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: fmv.w.x ft0, a0
+; RV64-NEXT: fcvt.w.s a0, ft0, rtz
+; RV64-NEXT: lui a1, 16
+; RV64-NEXT: addiw a1, a1, -1
+; RV64-NEXT: bge a0, a1, .LBB41_3
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: blez a0, .LBB41_4
+; RV64-NEXT: .LBB41_2: # %entry
+; RV64-NEXT: ret
+; RV64-NEXT: .LBB41_3: # %entry
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: bgtz a0, .LBB41_2
+; RV64-NEXT: .LBB41_4: # %entry
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: ret
+entry:
+ %conv = fptosi float %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 65535)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @stest_f16i16_mm(half %x) {
+; RV32-LABEL: stest_f16i16_mm:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __gnu_h2f_ieee at plt
+; RV32-NEXT: fmv.w.x ft0, a0
+; RV32-NEXT: fcvt.w.s a0, ft0, rtz
+; RV32-NEXT: lui a1, 8
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: blt a0, a1, .LBB42_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: .LBB42_2: # %entry
+; RV32-NEXT: lui a1, 1048568
+; RV32-NEXT: blt a1, a0, .LBB42_4
+; RV32-NEXT: # %bb.3: # %entry
+; RV32-NEXT: lui a0, 1048568
+; RV32-NEXT: .LBB42_4: # %entry
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: stest_f16i16_mm:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __gnu_h2f_ieee at plt
+; RV64-NEXT: fmv.w.x ft0, a0
+; RV64-NEXT: fcvt.l.s a0, ft0, rtz
+; RV64-NEXT: lui a1, 8
+; RV64-NEXT: addiw a1, a1, -1
+; RV64-NEXT: blt a0, a1, .LBB42_2
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: .LBB42_2: # %entry
+; RV64-NEXT: lui a1, 1048568
+; RV64-NEXT: blt a1, a0, .LBB42_4
+; RV64-NEXT: # %bb.3: # %entry
+; RV64-NEXT: lui a0, 1048568
+; RV64-NEXT: .LBB42_4: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+entry:
+ %conv = fptosi half %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 32767)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 -32768)
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @utesth_f16i16_mm(half %x) {
+; RV32-LABEL: utesth_f16i16_mm:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __gnu_h2f_ieee at plt
+; RV32-NEXT: fmv.w.x ft0, a0
+; RV32-NEXT: fcvt.wu.s a0, ft0, rtz
+; RV32-NEXT: lui a1, 16
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: bltu a0, a1, .LBB43_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: .LBB43_2: # %entry
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: utesth_f16i16_mm:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __gnu_h2f_ieee at plt
+; RV64-NEXT: fmv.w.x ft0, a0
+; RV64-NEXT: fcvt.lu.s a0, ft0, rtz
+; RV64-NEXT: sext.w a0, a0
+; RV64-NEXT: lui a1, 16
+; RV64-NEXT: addiw a1, a1, -1
+; RV64-NEXT: bltu a0, a1, .LBB43_2
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: .LBB43_2: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+entry:
+ %conv = fptoui half %x to i32
+ %spec.store.select = call i32 @llvm.umin.i32(i32 %conv, i32 65535)
+ %conv6 = trunc i32 %spec.store.select to i16
+ ret i16 %conv6
+}
+
+define i16 @ustest_f16i16_mm(half %x) {
+; RV32-LABEL: ustest_f16i16_mm:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __gnu_h2f_ieee at plt
+; RV32-NEXT: fmv.w.x ft0, a0
+; RV32-NEXT: fcvt.w.s a0, ft0, rtz
+; RV32-NEXT: lui a1, 16
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: blt a0, a1, .LBB44_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: .LBB44_2: # %entry
+; RV32-NEXT: bgtz a0, .LBB44_4
+; RV32-NEXT: # %bb.3: # %entry
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: .LBB44_4: # %entry
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: ustest_f16i16_mm:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __gnu_h2f_ieee at plt
+; RV64-NEXT: fmv.w.x ft0, a0
+; RV64-NEXT: fcvt.l.s a0, ft0, rtz
+; RV64-NEXT: lui a1, 16
+; RV64-NEXT: addiw a1, a1, -1
+; RV64-NEXT: blt a0, a1, .LBB44_2
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: .LBB44_2: # %entry
+; RV64-NEXT: bgtz a0, .LBB44_4
+; RV64-NEXT: # %bb.3: # %entry
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: .LBB44_4: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+entry:
+ %conv = fptosi half %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 65535)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+; i64 saturate
+
+define i64 @stest_f64i64_mm(double %x) {
+; RV32-LABEL: stest_f64i64_mm:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: mv a2, a1
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: call __fixdfti at plt
+; RV32-NEXT: lw a5, 8(sp)
+; RV32-NEXT: lw a3, 20(sp)
+; RV32-NEXT: lw a1, 12(sp)
+; RV32-NEXT: li a6, -1
+; RV32-NEXT: mv a4, a5
+; RV32-NEXT: bltz a3, .LBB45_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: li a4, -1
+; RV32-NEXT: .LBB45_2: # %entry
+; RV32-NEXT: lui a7, 524288
+; RV32-NEXT: addi a2, a7, -1
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: bgeu a1, a2, .LBB45_19
+; RV32-NEXT: # %bb.3: # %entry
+; RV32-NEXT: lw t0, 16(sp)
+; RV32-NEXT: bne a1, a2, .LBB45_20
+; RV32-NEXT: .LBB45_4: # %entry
+; RV32-NEXT: or a0, t0, a3
+; RV32-NEXT: bnez a0, .LBB45_21
+; RV32-NEXT: .LBB45_5: # %entry
+; RV32-NEXT: mv a4, a1
+; RV32-NEXT: bgez a3, .LBB45_22
+; RV32-NEXT: .LBB45_6: # %entry
+; RV32-NEXT: bgeu a1, a2, .LBB45_23
+; RV32-NEXT: .LBB45_7: # %entry
+; RV32-NEXT: bnez a0, .LBB45_24
+; RV32-NEXT: .LBB45_8: # %entry
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: bnez a3, .LBB45_25
+; RV32-NEXT: .LBB45_9: # %entry
+; RV32-NEXT: bgez a3, .LBB45_26
+; RV32-NEXT: .LBB45_10: # %entry
+; RV32-NEXT: mv a4, a5
+; RV32-NEXT: bgeu a7, a1, .LBB45_27
+; RV32-NEXT: .LBB45_11: # %entry
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: bne a1, a7, .LBB45_28
+; RV32-NEXT: .LBB45_12: # %entry
+; RV32-NEXT: bltz a3, .LBB45_29
+; RV32-NEXT: .LBB45_13: # %entry
+; RV32-NEXT: and a2, a2, a3
+; RV32-NEXT: bne a2, a6, .LBB45_30
+; RV32-NEXT: .LBB45_14: # %entry
+; RV32-NEXT: mv a4, a1
+; RV32-NEXT: bltz a3, .LBB45_31
+; RV32-NEXT: .LBB45_15: # %entry
+; RV32-NEXT: bgeu a7, a1, .LBB45_32
+; RV32-NEXT: .LBB45_16: # %entry
+; RV32-NEXT: beq a2, a6, .LBB45_18
+; RV32-NEXT: .LBB45_17: # %entry
+; RV32-NEXT: mv a1, a4
+; RV32-NEXT: .LBB45_18: # %entry
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB45_19: # %entry
+; RV32-NEXT: li a0, -1
+; RV32-NEXT: lw t0, 16(sp)
+; RV32-NEXT: beq a1, a2, .LBB45_4
+; RV32-NEXT: .LBB45_20: # %entry
+; RV32-NEXT: mv a5, a0
+; RV32-NEXT: or a0, t0, a3
+; RV32-NEXT: beqz a0, .LBB45_5
+; RV32-NEXT: .LBB45_21: # %entry
+; RV32-NEXT: mv a5, a4
+; RV32-NEXT: mv a4, a1
+; RV32-NEXT: bltz a3, .LBB45_6
+; RV32-NEXT: .LBB45_22: # %entry
+; RV32-NEXT: mv a4, a2
+; RV32-NEXT: bltu a1, a2, .LBB45_7
+; RV32-NEXT: .LBB45_23: # %entry
+; RV32-NEXT: mv a1, a2
+; RV32-NEXT: beqz a0, .LBB45_8
+; RV32-NEXT: .LBB45_24: # %entry
+; RV32-NEXT: mv a1, a4
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: beqz a3, .LBB45_9
+; RV32-NEXT: .LBB45_25: # %entry
+; RV32-NEXT: srai a0, a3, 31
+; RV32-NEXT: and a2, a0, t0
+; RV32-NEXT: bltz a3, .LBB45_10
+; RV32-NEXT: .LBB45_26: # %entry
+; RV32-NEXT: li a3, 0
+; RV32-NEXT: mv a4, a5
+; RV32-NEXT: bltu a7, a1, .LBB45_11
+; RV32-NEXT: .LBB45_27: # %entry
+; RV32-NEXT: li a4, 0
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: beq a1, a7, .LBB45_12
+; RV32-NEXT: .LBB45_28: # %entry
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: bgez a3, .LBB45_13
+; RV32-NEXT: .LBB45_29: # %entry
+; RV32-NEXT: li a5, 0
+; RV32-NEXT: and a2, a2, a3
+; RV32-NEXT: beq a2, a6, .LBB45_14
+; RV32-NEXT: .LBB45_30: # %entry
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: mv a4, a1
+; RV32-NEXT: bgez a3, .LBB45_15
+; RV32-NEXT: .LBB45_31: # %entry
+; RV32-NEXT: lui a4, 524288
+; RV32-NEXT: bltu a7, a1, .LBB45_16
+; RV32-NEXT: .LBB45_32: # %entry
+; RV32-NEXT: lui a1, 524288
+; RV32-NEXT: bne a2, a6, .LBB45_17
+; RV32-NEXT: j .LBB45_18
+;
+; RV64-LABEL: stest_f64i64_mm:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __fixdfti at plt
+; RV64-NEXT: li a2, -1
+; RV64-NEXT: srli a4, a2, 1
+; RV64-NEXT: mv a3, a0
+; RV64-NEXT: bgez a1, .LBB45_10
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: bgeu a0, a4, .LBB45_11
+; RV64-NEXT: .LBB45_2: # %entry
+; RV64-NEXT: bnez a1, .LBB45_12
+; RV64-NEXT: .LBB45_3: # %entry
+; RV64-NEXT: bltz a1, .LBB45_5
+; RV64-NEXT: .LBB45_4: # %entry
+; RV64-NEXT: li a1, 0
+; RV64-NEXT: .LBB45_5: # %entry
+; RV64-NEXT: slli a4, a2, 63
+; RV64-NEXT: mv a3, a0
+; RV64-NEXT: bltz a1, .LBB45_13
+; RV64-NEXT: # %bb.6: # %entry
+; RV64-NEXT: bgeu a4, a0, .LBB45_14
+; RV64-NEXT: .LBB45_7: # %entry
+; RV64-NEXT: beq a1, a2, .LBB45_9
+; RV64-NEXT: .LBB45_8: # %entry
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: .LBB45_9: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+; RV64-NEXT: .LBB45_10: # %entry
+; RV64-NEXT: mv a3, a4
+; RV64-NEXT: bltu a0, a4, .LBB45_2
+; RV64-NEXT: .LBB45_11: # %entry
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: beqz a1, .LBB45_3
+; RV64-NEXT: .LBB45_12: # %entry
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: bgez a1, .LBB45_4
+; RV64-NEXT: j .LBB45_5
+; RV64-NEXT: .LBB45_13: # %entry
+; RV64-NEXT: mv a3, a4
+; RV64-NEXT: bltu a4, a0, .LBB45_7
+; RV64-NEXT: .LBB45_14: # %entry
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: bne a1, a2, .LBB45_8
+; RV64-NEXT: j .LBB45_9
+entry:
+ %conv = fptosi double %x to i128
+ %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
+ %spec.store.select7 = call i128 @llvm.smax.i128(i128 %spec.store.select, i128 -9223372036854775808)
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @utest_f64i64_mm(double %x) {
+; RV32-LABEL: utest_f64i64_mm:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: mv a2, a1
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: call __fixunsdfti at plt
+; RV32-NEXT: lw a0, 20(sp)
+; RV32-NEXT: lw a3, 16(sp)
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: beqz a0, .LBB46_3
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: mv a2, a1
+; RV32-NEXT: beq a2, a1, .LBB46_4
+; RV32-NEXT: .LBB46_2:
+; RV32-NEXT: lw a4, 8(sp)
+; RV32-NEXT: j .LBB46_5
+; RV32-NEXT: .LBB46_3:
+; RV32-NEXT: seqz a2, a3
+; RV32-NEXT: bne a2, a1, .LBB46_2
+; RV32-NEXT: .LBB46_4: # %entry
+; RV32-NEXT: mv a4, a1
+; RV32-NEXT: .LBB46_5: # %entry
+; RV32-NEXT: xori a3, a3, 1
+; RV32-NEXT: or a3, a3, a0
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: beq a3, a1, .LBB46_7
+; RV32-NEXT: # %bb.6: # %entry
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: .LBB46_7: # %entry
+; RV32-NEXT: bne a2, a1, .LBB46_9
+; RV32-NEXT: # %bb.8: # %entry
+; RV32-NEXT: mv a2, a1
+; RV32-NEXT: bne a3, a1, .LBB46_10
+; RV32-NEXT: j .LBB46_11
+; RV32-NEXT: .LBB46_9:
+; RV32-NEXT: lw a2, 12(sp)
+; RV32-NEXT: beq a3, a1, .LBB46_11
+; RV32-NEXT: .LBB46_10: # %entry
+; RV32-NEXT: mv a1, a2
+; RV32-NEXT: .LBB46_11: # %entry
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: utest_f64i64_mm:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __fixunsdfti at plt
+; RV64-NEXT: mv a2, a0
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: beqz a1, .LBB46_2
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: mv a2, a0
+; RV64-NEXT: .LBB46_2: # %entry
+; RV64-NEXT: li a3, 1
+; RV64-NEXT: beq a1, a3, .LBB46_4
+; RV64-NEXT: # %bb.3: # %entry
+; RV64-NEXT: mv a0, a2
+; RV64-NEXT: .LBB46_4: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+entry:
+ %conv = fptoui double %x to i128
+ %spec.store.select = call i128 @llvm.umin.i128(i128 %conv, i128 18446744073709551616)
+ %conv6 = trunc i128 %spec.store.select to i64
+ ret i64 %conv6
+}
+
+define i64 @ustest_f64i64_mm(double %x) {
+; RV32-LABEL: ustest_f64i64_mm:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: mv a2, a1
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: call __fixdfti at plt
+; RV32-NEXT: lw a0, 16(sp)
+; RV32-NEXT: lw a2, 20(sp)
+; RV32-NEXT: li a1, 1
+; RV32-NEXT: mv a4, a0
+; RV32-NEXT: bgez a2, .LBB47_5
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: mv a3, a0
+; RV32-NEXT: bgeu a0, a1, .LBB47_6
+; RV32-NEXT: .LBB47_2: # %entry
+; RV32-NEXT: beqz a2, .LBB47_7
+; RV32-NEXT: .LBB47_3: # %entry
+; RV32-NEXT: slti a1, a2, 0
+; RV32-NEXT: mv a3, a4
+; RV32-NEXT: beqz a1, .LBB47_8
+; RV32-NEXT: .LBB47_4:
+; RV32-NEXT: lw a5, 8(sp)
+; RV32-NEXT: j .LBB47_9
+; RV32-NEXT: .LBB47_5: # %entry
+; RV32-NEXT: li a4, 1
+; RV32-NEXT: mv a3, a0
+; RV32-NEXT: bltu a0, a1, .LBB47_2
+; RV32-NEXT: .LBB47_6: # %entry
+; RV32-NEXT: li a3, 1
+; RV32-NEXT: bnez a2, .LBB47_3
+; RV32-NEXT: .LBB47_7:
+; RV32-NEXT: seqz a1, a0
+; RV32-NEXT: bnez a1, .LBB47_4
+; RV32-NEXT: .LBB47_8: # %entry
+; RV32-NEXT: li a5, 0
+; RV32-NEXT: .LBB47_9: # %entry
+; RV32-NEXT: xori a0, a0, 1
+; RV32-NEXT: or a0, a0, a2
+; RV32-NEXT: li a4, 0
+; RV32-NEXT: beqz a0, .LBB47_11
+; RV32-NEXT: # %bb.10: # %entry
+; RV32-NEXT: mv a4, a5
+; RV32-NEXT: .LBB47_11: # %entry
+; RV32-NEXT: bnez a1, .LBB47_13
+; RV32-NEXT: # %bb.12: # %entry
+; RV32-NEXT: li a5, 0
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: bnez a0, .LBB47_14
+; RV32-NEXT: j .LBB47_15
+; RV32-NEXT: .LBB47_13:
+; RV32-NEXT: lw a5, 12(sp)
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: beqz a0, .LBB47_15
+; RV32-NEXT: .LBB47_14: # %entry
+; RV32-NEXT: mv a1, a5
+; RV32-NEXT: .LBB47_15: # %entry
+; RV32-NEXT: bgez a2, .LBB47_20
+; RV32-NEXT: # %bb.16: # %entry
+; RV32-NEXT: mv a5, a4
+; RV32-NEXT: beqz a1, .LBB47_21
+; RV32-NEXT: .LBB47_17: # %entry
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: bnez a1, .LBB47_22
+; RV32-NEXT: .LBB47_18: # %entry
+; RV32-NEXT: beqz a2, .LBB47_23
+; RV32-NEXT: .LBB47_19: # %entry
+; RV32-NEXT: sgtz a5, a2
+; RV32-NEXT: beqz a5, .LBB47_24
+; RV32-NEXT: j .LBB47_25
+; RV32-NEXT: .LBB47_20: # %entry
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: mv a5, a4
+; RV32-NEXT: bnez a1, .LBB47_17
+; RV32-NEXT: .LBB47_21: # %entry
+; RV32-NEXT: li a5, 0
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: beqz a1, .LBB47_18
+; RV32-NEXT: .LBB47_22: # %entry
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: bnez a2, .LBB47_19
+; RV32-NEXT: .LBB47_23:
+; RV32-NEXT: snez a5, a3
+; RV32-NEXT: bnez a5, .LBB47_25
+; RV32-NEXT: .LBB47_24: # %entry
+; RV32-NEXT: li a4, 0
+; RV32-NEXT: .LBB47_25: # %entry
+; RV32-NEXT: or a2, a3, a2
+; RV32-NEXT: bnez a2, .LBB47_30
+; RV32-NEXT: # %bb.26: # %entry
+; RV32-NEXT: mv a3, a1
+; RV32-NEXT: beqz a5, .LBB47_31
+; RV32-NEXT: .LBB47_27: # %entry
+; RV32-NEXT: beqz a2, .LBB47_29
+; RV32-NEXT: .LBB47_28: # %entry
+; RV32-NEXT: mv a1, a3
+; RV32-NEXT: .LBB47_29: # %entry
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB47_30: # %entry
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: mv a3, a1
+; RV32-NEXT: bnez a5, .LBB47_27
+; RV32-NEXT: .LBB47_31: # %entry
+; RV32-NEXT: li a3, 0
+; RV32-NEXT: bnez a2, .LBB47_28
+; RV32-NEXT: j .LBB47_29
+;
+; RV64-LABEL: ustest_f64i64_mm:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __fixdfti at plt
+; RV64-NEXT: mv a2, a0
+; RV64-NEXT: li a4, 1
+; RV64-NEXT: mv a3, a1
+; RV64-NEXT: bgtz a1, .LBB47_6
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: bne a1, a4, .LBB47_7
+; RV64-NEXT: .LBB47_2: # %entry
+; RV64-NEXT: mv a1, a0
+; RV64-NEXT: blez a3, .LBB47_8
+; RV64-NEXT: .LBB47_3: # %entry
+; RV64-NEXT: beqz a3, .LBB47_5
+; RV64-NEXT: .LBB47_4: # %entry
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: .LBB47_5: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+; RV64-NEXT: .LBB47_6: # %entry
+; RV64-NEXT: li a2, 0
+; RV64-NEXT: li a3, 1
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: beq a1, a4, .LBB47_2
+; RV64-NEXT: .LBB47_7: # %entry
+; RV64-NEXT: mv a0, a2
+; RV64-NEXT: mv a1, a0
+; RV64-NEXT: bgtz a3, .LBB47_3
+; RV64-NEXT: .LBB47_8: # %entry
+; RV64-NEXT: li a1, 0
+; RV64-NEXT: bnez a3, .LBB47_4
+; RV64-NEXT: j .LBB47_5
+entry:
+ %conv = fptosi double %x to i128
+ %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 18446744073709551616)
+ %spec.store.select7 = call i128 @llvm.smax.i128(i128 %spec.store.select, i128 0)
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @stest_f32i64_mm(float %x) {
+; RV32-LABEL: stest_f32i64_mm:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: call __fixsfti at plt
+; RV32-NEXT: lw a5, 8(sp)
+; RV32-NEXT: lw a3, 20(sp)
+; RV32-NEXT: lw a1, 12(sp)
+; RV32-NEXT: li a6, -1
+; RV32-NEXT: mv a4, a5
+; RV32-NEXT: bltz a3, .LBB48_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: li a4, -1
+; RV32-NEXT: .LBB48_2: # %entry
+; RV32-NEXT: lui a7, 524288
+; RV32-NEXT: addi a2, a7, -1
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: bgeu a1, a2, .LBB48_19
+; RV32-NEXT: # %bb.3: # %entry
+; RV32-NEXT: lw t0, 16(sp)
+; RV32-NEXT: bne a1, a2, .LBB48_20
+; RV32-NEXT: .LBB48_4: # %entry
+; RV32-NEXT: or a0, t0, a3
+; RV32-NEXT: bnez a0, .LBB48_21
+; RV32-NEXT: .LBB48_5: # %entry
+; RV32-NEXT: mv a4, a1
+; RV32-NEXT: bgez a3, .LBB48_22
+; RV32-NEXT: .LBB48_6: # %entry
+; RV32-NEXT: bgeu a1, a2, .LBB48_23
+; RV32-NEXT: .LBB48_7: # %entry
+; RV32-NEXT: bnez a0, .LBB48_24
+; RV32-NEXT: .LBB48_8: # %entry
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: bnez a3, .LBB48_25
+; RV32-NEXT: .LBB48_9: # %entry
+; RV32-NEXT: bgez a3, .LBB48_26
+; RV32-NEXT: .LBB48_10: # %entry
+; RV32-NEXT: mv a4, a5
+; RV32-NEXT: bgeu a7, a1, .LBB48_27
+; RV32-NEXT: .LBB48_11: # %entry
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: bne a1, a7, .LBB48_28
+; RV32-NEXT: .LBB48_12: # %entry
+; RV32-NEXT: bltz a3, .LBB48_29
+; RV32-NEXT: .LBB48_13: # %entry
+; RV32-NEXT: and a2, a2, a3
+; RV32-NEXT: bne a2, a6, .LBB48_30
+; RV32-NEXT: .LBB48_14: # %entry
+; RV32-NEXT: mv a4, a1
+; RV32-NEXT: bltz a3, .LBB48_31
+; RV32-NEXT: .LBB48_15: # %entry
+; RV32-NEXT: bgeu a7, a1, .LBB48_32
+; RV32-NEXT: .LBB48_16: # %entry
+; RV32-NEXT: beq a2, a6, .LBB48_18
+; RV32-NEXT: .LBB48_17: # %entry
+; RV32-NEXT: mv a1, a4
+; RV32-NEXT: .LBB48_18: # %entry
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB48_19: # %entry
+; RV32-NEXT: li a0, -1
+; RV32-NEXT: lw t0, 16(sp)
+; RV32-NEXT: beq a1, a2, .LBB48_4
+; RV32-NEXT: .LBB48_20: # %entry
+; RV32-NEXT: mv a5, a0
+; RV32-NEXT: or a0, t0, a3
+; RV32-NEXT: beqz a0, .LBB48_5
+; RV32-NEXT: .LBB48_21: # %entry
+; RV32-NEXT: mv a5, a4
+; RV32-NEXT: mv a4, a1
+; RV32-NEXT: bltz a3, .LBB48_6
+; RV32-NEXT: .LBB48_22: # %entry
+; RV32-NEXT: mv a4, a2
+; RV32-NEXT: bltu a1, a2, .LBB48_7
+; RV32-NEXT: .LBB48_23: # %entry
+; RV32-NEXT: mv a1, a2
+; RV32-NEXT: beqz a0, .LBB48_8
+; RV32-NEXT: .LBB48_24: # %entry
+; RV32-NEXT: mv a1, a4
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: beqz a3, .LBB48_9
+; RV32-NEXT: .LBB48_25: # %entry
+; RV32-NEXT: srai a0, a3, 31
+; RV32-NEXT: and a2, a0, t0
+; RV32-NEXT: bltz a3, .LBB48_10
+; RV32-NEXT: .LBB48_26: # %entry
+; RV32-NEXT: li a3, 0
+; RV32-NEXT: mv a4, a5
+; RV32-NEXT: bltu a7, a1, .LBB48_11
+; RV32-NEXT: .LBB48_27: # %entry
+; RV32-NEXT: li a4, 0
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: beq a1, a7, .LBB48_12
+; RV32-NEXT: .LBB48_28: # %entry
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: bgez a3, .LBB48_13
+; RV32-NEXT: .LBB48_29: # %entry
+; RV32-NEXT: li a5, 0
+; RV32-NEXT: and a2, a2, a3
+; RV32-NEXT: beq a2, a6, .LBB48_14
+; RV32-NEXT: .LBB48_30: # %entry
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: mv a4, a1
+; RV32-NEXT: bgez a3, .LBB48_15
+; RV32-NEXT: .LBB48_31: # %entry
+; RV32-NEXT: lui a4, 524288
+; RV32-NEXT: bltu a7, a1, .LBB48_16
+; RV32-NEXT: .LBB48_32: # %entry
+; RV32-NEXT: lui a1, 524288
+; RV32-NEXT: bne a2, a6, .LBB48_17
+; RV32-NEXT: j .LBB48_18
+;
+; RV64-LABEL: stest_f32i64_mm:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __fixsfti at plt
+; RV64-NEXT: li a2, -1
+; RV64-NEXT: srli a4, a2, 1
+; RV64-NEXT: mv a3, a0
+; RV64-NEXT: bgez a1, .LBB48_10
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: bgeu a0, a4, .LBB48_11
+; RV64-NEXT: .LBB48_2: # %entry
+; RV64-NEXT: bnez a1, .LBB48_12
+; RV64-NEXT: .LBB48_3: # %entry
+; RV64-NEXT: bltz a1, .LBB48_5
+; RV64-NEXT: .LBB48_4: # %entry
+; RV64-NEXT: li a1, 0
+; RV64-NEXT: .LBB48_5: # %entry
+; RV64-NEXT: slli a4, a2, 63
+; RV64-NEXT: mv a3, a0
+; RV64-NEXT: bltz a1, .LBB48_13
+; RV64-NEXT: # %bb.6: # %entry
+; RV64-NEXT: bgeu a4, a0, .LBB48_14
+; RV64-NEXT: .LBB48_7: # %entry
+; RV64-NEXT: beq a1, a2, .LBB48_9
+; RV64-NEXT: .LBB48_8: # %entry
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: .LBB48_9: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+; RV64-NEXT: .LBB48_10: # %entry
+; RV64-NEXT: mv a3, a4
+; RV64-NEXT: bltu a0, a4, .LBB48_2
+; RV64-NEXT: .LBB48_11: # %entry
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: beqz a1, .LBB48_3
+; RV64-NEXT: .LBB48_12: # %entry
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: bgez a1, .LBB48_4
+; RV64-NEXT: j .LBB48_5
+; RV64-NEXT: .LBB48_13: # %entry
+; RV64-NEXT: mv a3, a4
+; RV64-NEXT: bltu a4, a0, .LBB48_7
+; RV64-NEXT: .LBB48_14: # %entry
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: bne a1, a2, .LBB48_8
+; RV64-NEXT: j .LBB48_9
+entry:
+ %conv = fptosi float %x to i128
+ %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
+ %spec.store.select7 = call i128 @llvm.smax.i128(i128 %spec.store.select, i128 -9223372036854775808)
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @utest_f32i64_mm(float %x) {
+; RV32-LABEL: utest_f32i64_mm:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: call __fixunssfti at plt
+; RV32-NEXT: lw a0, 20(sp)
+; RV32-NEXT: lw a3, 16(sp)
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: beqz a0, .LBB49_3
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: mv a2, a1
+; RV32-NEXT: beq a2, a1, .LBB49_4
+; RV32-NEXT: .LBB49_2:
+; RV32-NEXT: lw a4, 8(sp)
+; RV32-NEXT: j .LBB49_5
+; RV32-NEXT: .LBB49_3:
+; RV32-NEXT: seqz a2, a3
+; RV32-NEXT: bne a2, a1, .LBB49_2
+; RV32-NEXT: .LBB49_4: # %entry
+; RV32-NEXT: mv a4, a1
+; RV32-NEXT: .LBB49_5: # %entry
+; RV32-NEXT: xori a3, a3, 1
+; RV32-NEXT: or a3, a3, a0
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: beq a3, a1, .LBB49_7
+; RV32-NEXT: # %bb.6: # %entry
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: .LBB49_7: # %entry
+; RV32-NEXT: bne a2, a1, .LBB49_9
+; RV32-NEXT: # %bb.8: # %entry
+; RV32-NEXT: mv a2, a1
+; RV32-NEXT: bne a3, a1, .LBB49_10
+; RV32-NEXT: j .LBB49_11
+; RV32-NEXT: .LBB49_9:
+; RV32-NEXT: lw a2, 12(sp)
+; RV32-NEXT: beq a3, a1, .LBB49_11
+; RV32-NEXT: .LBB49_10: # %entry
+; RV32-NEXT: mv a1, a2
+; RV32-NEXT: .LBB49_11: # %entry
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: utest_f32i64_mm:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __fixunssfti at plt
+; RV64-NEXT: mv a2, a0
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: beqz a1, .LBB49_2
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: mv a2, a0
+; RV64-NEXT: .LBB49_2: # %entry
+; RV64-NEXT: li a3, 1
+; RV64-NEXT: beq a1, a3, .LBB49_4
+; RV64-NEXT: # %bb.3: # %entry
+; RV64-NEXT: mv a0, a2
+; RV64-NEXT: .LBB49_4: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+entry:
+ %conv = fptoui float %x to i128
+ %spec.store.select = call i128 @llvm.umin.i128(i128 %conv, i128 18446744073709551616)
+ %conv6 = trunc i128 %spec.store.select to i64
+ ret i64 %conv6
+}
+
+define i64 @ustest_f32i64_mm(float %x) {
+; RV32-LABEL: ustest_f32i64_mm:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: call __fixsfti at plt
+; RV32-NEXT: lw a0, 16(sp)
+; RV32-NEXT: lw a2, 20(sp)
+; RV32-NEXT: li a1, 1
+; RV32-NEXT: mv a4, a0
+; RV32-NEXT: bgez a2, .LBB50_5
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: mv a3, a0
+; RV32-NEXT: bgeu a0, a1, .LBB50_6
+; RV32-NEXT: .LBB50_2: # %entry
+; RV32-NEXT: beqz a2, .LBB50_7
+; RV32-NEXT: .LBB50_3: # %entry
+; RV32-NEXT: slti a1, a2, 0
+; RV32-NEXT: mv a3, a4
+; RV32-NEXT: beqz a1, .LBB50_8
+; RV32-NEXT: .LBB50_4:
+; RV32-NEXT: lw a5, 8(sp)
+; RV32-NEXT: j .LBB50_9
+; RV32-NEXT: .LBB50_5: # %entry
+; RV32-NEXT: li a4, 1
+; RV32-NEXT: mv a3, a0
+; RV32-NEXT: bltu a0, a1, .LBB50_2
+; RV32-NEXT: .LBB50_6: # %entry
+; RV32-NEXT: li a3, 1
+; RV32-NEXT: bnez a2, .LBB50_3
+; RV32-NEXT: .LBB50_7:
+; RV32-NEXT: seqz a1, a0
+; RV32-NEXT: bnez a1, .LBB50_4
+; RV32-NEXT: .LBB50_8: # %entry
+; RV32-NEXT: li a5, 0
+; RV32-NEXT: .LBB50_9: # %entry
+; RV32-NEXT: xori a0, a0, 1
+; RV32-NEXT: or a0, a0, a2
+; RV32-NEXT: li a4, 0
+; RV32-NEXT: beqz a0, .LBB50_11
+; RV32-NEXT: # %bb.10: # %entry
+; RV32-NEXT: mv a4, a5
+; RV32-NEXT: .LBB50_11: # %entry
+; RV32-NEXT: bnez a1, .LBB50_13
+; RV32-NEXT: # %bb.12: # %entry
+; RV32-NEXT: li a5, 0
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: bnez a0, .LBB50_14
+; RV32-NEXT: j .LBB50_15
+; RV32-NEXT: .LBB50_13:
+; RV32-NEXT: lw a5, 12(sp)
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: beqz a0, .LBB50_15
+; RV32-NEXT: .LBB50_14: # %entry
+; RV32-NEXT: mv a1, a5
+; RV32-NEXT: .LBB50_15: # %entry
+; RV32-NEXT: bgez a2, .LBB50_20
+; RV32-NEXT: # %bb.16: # %entry
+; RV32-NEXT: mv a5, a4
+; RV32-NEXT: beqz a1, .LBB50_21
+; RV32-NEXT: .LBB50_17: # %entry
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: bnez a1, .LBB50_22
+; RV32-NEXT: .LBB50_18: # %entry
+; RV32-NEXT: beqz a2, .LBB50_23
+; RV32-NEXT: .LBB50_19: # %entry
+; RV32-NEXT: sgtz a5, a2
+; RV32-NEXT: beqz a5, .LBB50_24
+; RV32-NEXT: j .LBB50_25
+; RV32-NEXT: .LBB50_20: # %entry
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: mv a5, a4
+; RV32-NEXT: bnez a1, .LBB50_17
+; RV32-NEXT: .LBB50_21: # %entry
+; RV32-NEXT: li a5, 0
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: beqz a1, .LBB50_18
+; RV32-NEXT: .LBB50_22: # %entry
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: bnez a2, .LBB50_19
+; RV32-NEXT: .LBB50_23:
+; RV32-NEXT: snez a5, a3
+; RV32-NEXT: bnez a5, .LBB50_25
+; RV32-NEXT: .LBB50_24: # %entry
+; RV32-NEXT: li a4, 0
+; RV32-NEXT: .LBB50_25: # %entry
+; RV32-NEXT: or a2, a3, a2
+; RV32-NEXT: bnez a2, .LBB50_30
+; RV32-NEXT: # %bb.26: # %entry
+; RV32-NEXT: mv a3, a1
+; RV32-NEXT: beqz a5, .LBB50_31
+; RV32-NEXT: .LBB50_27: # %entry
+; RV32-NEXT: beqz a2, .LBB50_29
+; RV32-NEXT: .LBB50_28: # %entry
+; RV32-NEXT: mv a1, a3
+; RV32-NEXT: .LBB50_29: # %entry
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB50_30: # %entry
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: mv a3, a1
+; RV32-NEXT: bnez a5, .LBB50_27
+; RV32-NEXT: .LBB50_31: # %entry
+; RV32-NEXT: li a3, 0
+; RV32-NEXT: bnez a2, .LBB50_28
+; RV32-NEXT: j .LBB50_29
+;
+; RV64-LABEL: ustest_f32i64_mm:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __fixsfti at plt
+; RV64-NEXT: mv a2, a0
+; RV64-NEXT: li a4, 1
+; RV64-NEXT: mv a3, a1
+; RV64-NEXT: bgtz a1, .LBB50_6
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: bne a1, a4, .LBB50_7
+; RV64-NEXT: .LBB50_2: # %entry
+; RV64-NEXT: mv a1, a0
+; RV64-NEXT: blez a3, .LBB50_8
+; RV64-NEXT: .LBB50_3: # %entry
+; RV64-NEXT: beqz a3, .LBB50_5
+; RV64-NEXT: .LBB50_4: # %entry
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: .LBB50_5: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+; RV64-NEXT: .LBB50_6: # %entry
+; RV64-NEXT: li a2, 0
+; RV64-NEXT: li a3, 1
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: beq a1, a4, .LBB50_2
+; RV64-NEXT: .LBB50_7: # %entry
+; RV64-NEXT: mv a0, a2
+; RV64-NEXT: mv a1, a0
+; RV64-NEXT: bgtz a3, .LBB50_3
+; RV64-NEXT: .LBB50_8: # %entry
+; RV64-NEXT: li a1, 0
+; RV64-NEXT: bnez a3, .LBB50_4
+; RV64-NEXT: j .LBB50_5
+entry:
+ %conv = fptosi float %x to i128
+ %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 18446744073709551616)
+ %spec.store.select7 = call i128 @llvm.smax.i128(i128 %spec.store.select, i128 0)
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @stest_f16i64_mm(half %x) {
+; RV32-LABEL: stest_f16i64_mm:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __gnu_h2f_ieee at plt
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: call __fixsfti at plt
+; RV32-NEXT: lw a5, 8(sp)
+; RV32-NEXT: lw a3, 20(sp)
+; RV32-NEXT: lw a1, 12(sp)
+; RV32-NEXT: li a6, -1
+; RV32-NEXT: mv a4, a5
+; RV32-NEXT: bltz a3, .LBB51_2
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: li a4, -1
+; RV32-NEXT: .LBB51_2: # %entry
+; RV32-NEXT: lui a7, 524288
+; RV32-NEXT: addi a2, a7, -1
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: bgeu a1, a2, .LBB51_19
+; RV32-NEXT: # %bb.3: # %entry
+; RV32-NEXT: lw t0, 16(sp)
+; RV32-NEXT: bne a1, a2, .LBB51_20
+; RV32-NEXT: .LBB51_4: # %entry
+; RV32-NEXT: or a0, t0, a3
+; RV32-NEXT: bnez a0, .LBB51_21
+; RV32-NEXT: .LBB51_5: # %entry
+; RV32-NEXT: mv a4, a1
+; RV32-NEXT: bgez a3, .LBB51_22
+; RV32-NEXT: .LBB51_6: # %entry
+; RV32-NEXT: bgeu a1, a2, .LBB51_23
+; RV32-NEXT: .LBB51_7: # %entry
+; RV32-NEXT: bnez a0, .LBB51_24
+; RV32-NEXT: .LBB51_8: # %entry
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: bnez a3, .LBB51_25
+; RV32-NEXT: .LBB51_9: # %entry
+; RV32-NEXT: bgez a3, .LBB51_26
+; RV32-NEXT: .LBB51_10: # %entry
+; RV32-NEXT: mv a4, a5
+; RV32-NEXT: bgeu a7, a1, .LBB51_27
+; RV32-NEXT: .LBB51_11: # %entry
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: bne a1, a7, .LBB51_28
+; RV32-NEXT: .LBB51_12: # %entry
+; RV32-NEXT: bltz a3, .LBB51_29
+; RV32-NEXT: .LBB51_13: # %entry
+; RV32-NEXT: and a2, a2, a3
+; RV32-NEXT: bne a2, a6, .LBB51_30
+; RV32-NEXT: .LBB51_14: # %entry
+; RV32-NEXT: mv a4, a1
+; RV32-NEXT: bltz a3, .LBB51_31
+; RV32-NEXT: .LBB51_15: # %entry
+; RV32-NEXT: bgeu a7, a1, .LBB51_32
+; RV32-NEXT: .LBB51_16: # %entry
+; RV32-NEXT: beq a2, a6, .LBB51_18
+; RV32-NEXT: .LBB51_17: # %entry
+; RV32-NEXT: mv a1, a4
+; RV32-NEXT: .LBB51_18: # %entry
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB51_19: # %entry
+; RV32-NEXT: li a0, -1
+; RV32-NEXT: lw t0, 16(sp)
+; RV32-NEXT: beq a1, a2, .LBB51_4
+; RV32-NEXT: .LBB51_20: # %entry
+; RV32-NEXT: mv a5, a0
+; RV32-NEXT: or a0, t0, a3
+; RV32-NEXT: beqz a0, .LBB51_5
+; RV32-NEXT: .LBB51_21: # %entry
+; RV32-NEXT: mv a5, a4
+; RV32-NEXT: mv a4, a1
+; RV32-NEXT: bltz a3, .LBB51_6
+; RV32-NEXT: .LBB51_22: # %entry
+; RV32-NEXT: mv a4, a2
+; RV32-NEXT: bltu a1, a2, .LBB51_7
+; RV32-NEXT: .LBB51_23: # %entry
+; RV32-NEXT: mv a1, a2
+; RV32-NEXT: beqz a0, .LBB51_8
+; RV32-NEXT: .LBB51_24: # %entry
+; RV32-NEXT: mv a1, a4
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: beqz a3, .LBB51_9
+; RV32-NEXT: .LBB51_25: # %entry
+; RV32-NEXT: srai a0, a3, 31
+; RV32-NEXT: and a2, a0, t0
+; RV32-NEXT: bltz a3, .LBB51_10
+; RV32-NEXT: .LBB51_26: # %entry
+; RV32-NEXT: li a3, 0
+; RV32-NEXT: mv a4, a5
+; RV32-NEXT: bltu a7, a1, .LBB51_11
+; RV32-NEXT: .LBB51_27: # %entry
+; RV32-NEXT: li a4, 0
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: beq a1, a7, .LBB51_12
+; RV32-NEXT: .LBB51_28: # %entry
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: bgez a3, .LBB51_13
+; RV32-NEXT: .LBB51_29: # %entry
+; RV32-NEXT: li a5, 0
+; RV32-NEXT: and a2, a2, a3
+; RV32-NEXT: beq a2, a6, .LBB51_14
+; RV32-NEXT: .LBB51_30: # %entry
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: mv a4, a1
+; RV32-NEXT: bgez a3, .LBB51_15
+; RV32-NEXT: .LBB51_31: # %entry
+; RV32-NEXT: lui a4, 524288
+; RV32-NEXT: bltu a7, a1, .LBB51_16
+; RV32-NEXT: .LBB51_32: # %entry
+; RV32-NEXT: lui a1, 524288
+; RV32-NEXT: bne a2, a6, .LBB51_17
+; RV32-NEXT: j .LBB51_18
+;
+; RV64-LABEL: stest_f16i64_mm:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __gnu_h2f_ieee at plt
+; RV64-NEXT: call __fixsfti at plt
+; RV64-NEXT: li a2, -1
+; RV64-NEXT: srli a4, a2, 1
+; RV64-NEXT: mv a3, a0
+; RV64-NEXT: bgez a1, .LBB51_10
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: bgeu a0, a4, .LBB51_11
+; RV64-NEXT: .LBB51_2: # %entry
+; RV64-NEXT: bnez a1, .LBB51_12
+; RV64-NEXT: .LBB51_3: # %entry
+; RV64-NEXT: bltz a1, .LBB51_5
+; RV64-NEXT: .LBB51_4: # %entry
+; RV64-NEXT: li a1, 0
+; RV64-NEXT: .LBB51_5: # %entry
+; RV64-NEXT: slli a4, a2, 63
+; RV64-NEXT: mv a3, a0
+; RV64-NEXT: bltz a1, .LBB51_13
+; RV64-NEXT: # %bb.6: # %entry
+; RV64-NEXT: bgeu a4, a0, .LBB51_14
+; RV64-NEXT: .LBB51_7: # %entry
+; RV64-NEXT: beq a1, a2, .LBB51_9
+; RV64-NEXT: .LBB51_8: # %entry
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: .LBB51_9: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+; RV64-NEXT: .LBB51_10: # %entry
+; RV64-NEXT: mv a3, a4
+; RV64-NEXT: bltu a0, a4, .LBB51_2
+; RV64-NEXT: .LBB51_11: # %entry
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: beqz a1, .LBB51_3
+; RV64-NEXT: .LBB51_12: # %entry
+; RV64-NEXT: mv a0, a3
+; RV64-NEXT: bgez a1, .LBB51_4
+; RV64-NEXT: j .LBB51_5
+; RV64-NEXT: .LBB51_13: # %entry
+; RV64-NEXT: mv a3, a4
+; RV64-NEXT: bltu a4, a0, .LBB51_7
+; RV64-NEXT: .LBB51_14: # %entry
+; RV64-NEXT: mv a0, a4
+; RV64-NEXT: bne a1, a2, .LBB51_8
+; RV64-NEXT: j .LBB51_9
+entry:
+ %conv = fptosi half %x to i128
+ %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
+ %spec.store.select7 = call i128 @llvm.smax.i128(i128 %spec.store.select, i128 -9223372036854775808)
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @utesth_f16i64_mm(half %x) {
+; RV32-LABEL: utesth_f16i64_mm:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __gnu_h2f_ieee at plt
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: call __fixunssfti at plt
+; RV32-NEXT: lw a0, 20(sp)
+; RV32-NEXT: lw a3, 16(sp)
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: beqz a0, .LBB52_3
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: mv a2, a1
+; RV32-NEXT: beq a2, a1, .LBB52_4
+; RV32-NEXT: .LBB52_2:
+; RV32-NEXT: lw a4, 8(sp)
+; RV32-NEXT: j .LBB52_5
+; RV32-NEXT: .LBB52_3:
+; RV32-NEXT: seqz a2, a3
+; RV32-NEXT: bne a2, a1, .LBB52_2
+; RV32-NEXT: .LBB52_4: # %entry
+; RV32-NEXT: mv a4, a1
+; RV32-NEXT: .LBB52_5: # %entry
+; RV32-NEXT: xori a3, a3, 1
+; RV32-NEXT: or a3, a3, a0
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: beq a3, a1, .LBB52_7
+; RV32-NEXT: # %bb.6: # %entry
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: .LBB52_7: # %entry
+; RV32-NEXT: bne a2, a1, .LBB52_9
+; RV32-NEXT: # %bb.8: # %entry
+; RV32-NEXT: mv a2, a1
+; RV32-NEXT: bne a3, a1, .LBB52_10
+; RV32-NEXT: j .LBB52_11
+; RV32-NEXT: .LBB52_9:
+; RV32-NEXT: lw a2, 12(sp)
+; RV32-NEXT: beq a3, a1, .LBB52_11
+; RV32-NEXT: .LBB52_10: # %entry
+; RV32-NEXT: mv a1, a2
+; RV32-NEXT: .LBB52_11: # %entry
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+;
+; RV64-LABEL: utesth_f16i64_mm:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __gnu_h2f_ieee at plt
+; RV64-NEXT: call __fixunssfti at plt
+; RV64-NEXT: mv a2, a0
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: beqz a1, .LBB52_2
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: mv a2, a0
+; RV64-NEXT: .LBB52_2: # %entry
+; RV64-NEXT: li a3, 1
+; RV64-NEXT: beq a1, a3, .LBB52_4
+; RV64-NEXT: # %bb.3: # %entry
+; RV64-NEXT: mv a0, a2
+; RV64-NEXT: .LBB52_4: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+entry:
+ %conv = fptoui half %x to i128
+ %spec.store.select = call i128 @llvm.umin.i128(i128 %conv, i128 18446744073709551616)
+ %conv6 = trunc i128 %spec.store.select to i64
+ ret i64 %conv6
+}
+
+define i64 @ustest_f16i64_mm(half %x) {
+; RV32-LABEL: ustest_f16i64_mm:
+; RV32: # %bb.0: # %entry
+; RV32-NEXT: addi sp, sp, -32
+; RV32-NEXT: .cfi_def_cfa_offset 32
+; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: call __gnu_h2f_ieee at plt
+; RV32-NEXT: mv a1, a0
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: call __fixsfti at plt
+; RV32-NEXT: lw a0, 16(sp)
+; RV32-NEXT: lw a2, 20(sp)
+; RV32-NEXT: li a1, 1
+; RV32-NEXT: mv a4, a0
+; RV32-NEXT: bgez a2, .LBB53_5
+; RV32-NEXT: # %bb.1: # %entry
+; RV32-NEXT: mv a3, a0
+; RV32-NEXT: bgeu a0, a1, .LBB53_6
+; RV32-NEXT: .LBB53_2: # %entry
+; RV32-NEXT: beqz a2, .LBB53_7
+; RV32-NEXT: .LBB53_3: # %entry
+; RV32-NEXT: slti a1, a2, 0
+; RV32-NEXT: mv a3, a4
+; RV32-NEXT: beqz a1, .LBB53_8
+; RV32-NEXT: .LBB53_4:
+; RV32-NEXT: lw a5, 8(sp)
+; RV32-NEXT: j .LBB53_9
+; RV32-NEXT: .LBB53_5: # %entry
+; RV32-NEXT: li a4, 1
+; RV32-NEXT: mv a3, a0
+; RV32-NEXT: bltu a0, a1, .LBB53_2
+; RV32-NEXT: .LBB53_6: # %entry
+; RV32-NEXT: li a3, 1
+; RV32-NEXT: bnez a2, .LBB53_3
+; RV32-NEXT: .LBB53_7:
+; RV32-NEXT: seqz a1, a0
+; RV32-NEXT: bnez a1, .LBB53_4
+; RV32-NEXT: .LBB53_8: # %entry
+; RV32-NEXT: li a5, 0
+; RV32-NEXT: .LBB53_9: # %entry
+; RV32-NEXT: xori a0, a0, 1
+; RV32-NEXT: or a0, a0, a2
+; RV32-NEXT: li a4, 0
+; RV32-NEXT: beqz a0, .LBB53_11
+; RV32-NEXT: # %bb.10: # %entry
+; RV32-NEXT: mv a4, a5
+; RV32-NEXT: .LBB53_11: # %entry
+; RV32-NEXT: bnez a1, .LBB53_13
+; RV32-NEXT: # %bb.12: # %entry
+; RV32-NEXT: li a5, 0
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: bnez a0, .LBB53_14
+; RV32-NEXT: j .LBB53_15
+; RV32-NEXT: .LBB53_13:
+; RV32-NEXT: lw a5, 12(sp)
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: beqz a0, .LBB53_15
+; RV32-NEXT: .LBB53_14: # %entry
+; RV32-NEXT: mv a1, a5
+; RV32-NEXT: .LBB53_15: # %entry
+; RV32-NEXT: bgez a2, .LBB53_20
+; RV32-NEXT: # %bb.16: # %entry
+; RV32-NEXT: mv a5, a4
+; RV32-NEXT: beqz a1, .LBB53_21
+; RV32-NEXT: .LBB53_17: # %entry
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: bnez a1, .LBB53_22
+; RV32-NEXT: .LBB53_18: # %entry
+; RV32-NEXT: beqz a2, .LBB53_23
+; RV32-NEXT: .LBB53_19: # %entry
+; RV32-NEXT: sgtz a5, a2
+; RV32-NEXT: beqz a5, .LBB53_24
+; RV32-NEXT: j .LBB53_25
+; RV32-NEXT: .LBB53_20: # %entry
+; RV32-NEXT: li a2, 0
+; RV32-NEXT: mv a5, a4
+; RV32-NEXT: bnez a1, .LBB53_17
+; RV32-NEXT: .LBB53_21: # %entry
+; RV32-NEXT: li a5, 0
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: beqz a1, .LBB53_18
+; RV32-NEXT: .LBB53_22: # %entry
+; RV32-NEXT: mv a0, a5
+; RV32-NEXT: bnez a2, .LBB53_19
+; RV32-NEXT: .LBB53_23:
+; RV32-NEXT: snez a5, a3
+; RV32-NEXT: bnez a5, .LBB53_25
+; RV32-NEXT: .LBB53_24: # %entry
+; RV32-NEXT: li a4, 0
+; RV32-NEXT: .LBB53_25: # %entry
+; RV32-NEXT: or a2, a3, a2
+; RV32-NEXT: bnez a2, .LBB53_30
+; RV32-NEXT: # %bb.26: # %entry
+; RV32-NEXT: mv a3, a1
+; RV32-NEXT: beqz a5, .LBB53_31
+; RV32-NEXT: .LBB53_27: # %entry
+; RV32-NEXT: beqz a2, .LBB53_29
+; RV32-NEXT: .LBB53_28: # %entry
+; RV32-NEXT: mv a1, a3
+; RV32-NEXT: .LBB53_29: # %entry
+; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB53_30: # %entry
+; RV32-NEXT: mv a0, a4
+; RV32-NEXT: mv a3, a1
+; RV32-NEXT: bnez a5, .LBB53_27
+; RV32-NEXT: .LBB53_31: # %entry
+; RV32-NEXT: li a3, 0
+; RV32-NEXT: bnez a2, .LBB53_28
+; RV32-NEXT: j .LBB53_29
+;
+; RV64-LABEL: ustest_f16i64_mm:
+; RV64: # %bb.0: # %entry
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: call __gnu_h2f_ieee at plt
+; RV64-NEXT: call __fixsfti at plt
+; RV64-NEXT: mv a2, a0
+; RV64-NEXT: li a4, 1
+; RV64-NEXT: mv a3, a1
+; RV64-NEXT: bgtz a1, .LBB53_6
+; RV64-NEXT: # %bb.1: # %entry
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: bne a1, a4, .LBB53_7
+; RV64-NEXT: .LBB53_2: # %entry
+; RV64-NEXT: mv a1, a0
+; RV64-NEXT: blez a3, .LBB53_8
+; RV64-NEXT: .LBB53_3: # %entry
+; RV64-NEXT: beqz a3, .LBB53_5
+; RV64-NEXT: .LBB53_4: # %entry
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: .LBB53_5: # %entry
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+; RV64-NEXT: .LBB53_6: # %entry
+; RV64-NEXT: li a2, 0
+; RV64-NEXT: li a3, 1
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: beq a1, a4, .LBB53_2
+; RV64-NEXT: .LBB53_7: # %entry
+; RV64-NEXT: mv a0, a2
+; RV64-NEXT: mv a1, a0
+; RV64-NEXT: bgtz a3, .LBB53_3
+; RV64-NEXT: .LBB53_8: # %entry
+; RV64-NEXT: li a1, 0
+; RV64-NEXT: bnez a3, .LBB53_4
+; RV64-NEXT: j .LBB53_5
+entry:
+ %conv = fptosi half %x to i128
+ %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 18446744073709551616)
+ %spec.store.select7 = call i128 @llvm.smax.i128(i128 %spec.store.select, i128 0)
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+declare i32 @llvm.smin.i32(i32, i32)
+declare i32 @llvm.smax.i32(i32, i32)
+declare i32 @llvm.umin.i32(i32, i32)
+declare i64 @llvm.smin.i64(i64, i64)
+declare i64 @llvm.smax.i64(i64, i64)
+declare i64 @llvm.umin.i64(i64, i64)
+declare i128 @llvm.smin.i128(i128, i128)
+declare i128 @llvm.smax.i128(i128, i128)
+declare i128 @llvm.umin.i128(i128, i128)
diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll
new file mode 100644
index 0000000000000..53285d759864f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll
@@ -0,0 +1,4272 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+experimental-v -verify-machineinstrs < %s | FileCheck %s
+
+; i32 saturate
+
+define <2 x i32> @stest_f64i32(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.d.x ft0, a1
+; CHECK-NEXT: fmv.d.x ft1, a0
+; CHECK-NEXT: fcvt.l.d a1, ft0, rtz
+; CHECK-NEXT: lui a2, 524288
+; CHECK-NEXT: addiw a3, a2, -1
+; CHECK-NEXT: fcvt.l.d a0, ft1, rtz
+; CHECK-NEXT: bge a1, a3, .LBB0_5
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: bge a0, a3, .LBB0_6
+; CHECK-NEXT: .LBB0_2: # %entry
+; CHECK-NEXT: bge a2, a0, .LBB0_7
+; CHECK-NEXT: .LBB0_3: # %entry
+; CHECK-NEXT: bge a2, a1, .LBB0_8
+; CHECK-NEXT: .LBB0_4: # %entry
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB0_5: # %entry
+; CHECK-NEXT: mv a1, a3
+; CHECK-NEXT: blt a0, a3, .LBB0_2
+; CHECK-NEXT: .LBB0_6: # %entry
+; CHECK-NEXT: mv a0, a3
+; CHECK-NEXT: blt a2, a0, .LBB0_3
+; CHECK-NEXT: .LBB0_7: # %entry
+; CHECK-NEXT: lui a0, 524288
+; CHECK-NEXT: blt a2, a1, .LBB0_4
+; CHECK-NEXT: .LBB0_8: # %entry
+; CHECK-NEXT: lui a1, 524288
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i64>
+ %0 = icmp slt <2 x i64> %conv, <i64 2147483647, i64 2147483647>
+ %spec.store.select = select <2 x i1> %0, <2 x i64> %conv, <2 x i64> <i64 2147483647, i64 2147483647>
+ %1 = icmp sgt <2 x i64> %spec.store.select, <i64 -2147483648, i64 -2147483648>
+ %spec.store.select7 = select <2 x i1> %1, <2 x i64> %spec.store.select, <2 x i64> <i64 -2147483648, i64 -2147483648>
+ %conv6 = trunc <2 x i64> %spec.store.select7 to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <2 x i32> @utest_f64i32(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.d.x ft0, a0
+; CHECK-NEXT: fmv.d.x ft1, a1
+; CHECK-NEXT: fcvt.lu.d a0, ft0, rtz
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: srli a2, a1, 32
+; CHECK-NEXT: fcvt.lu.d a1, ft1, rtz
+; CHECK-NEXT: bgeu a0, a2, .LBB1_3
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: bgeu a1, a2, .LBB1_4
+; CHECK-NEXT: .LBB1_2: # %entry
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB1_3: # %entry
+; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: bltu a1, a2, .LBB1_2
+; CHECK-NEXT: .LBB1_4: # %entry
+; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i64>
+ %0 = icmp ult <2 x i64> %conv, <i64 4294967295, i64 4294967295>
+ %spec.store.select = select <2 x i1> %0, <2 x i64> %conv, <2 x i64> <i64 4294967295, i64 4294967295>
+ %conv6 = trunc <2 x i64> %spec.store.select to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <2 x i32> @ustest_f64i32(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.d.x ft0, a1
+; CHECK-NEXT: fmv.d.x ft1, a0
+; CHECK-NEXT: fcvt.l.d a1, ft0, rtz
+; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: srli a2, a0, 32
+; CHECK-NEXT: fcvt.l.d a0, ft1, rtz
+; CHECK-NEXT: bge a1, a2, .LBB2_5
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: bge a0, a2, .LBB2_6
+; CHECK-NEXT: .LBB2_2: # %entry
+; CHECK-NEXT: blez a0, .LBB2_7
+; CHECK-NEXT: .LBB2_3: # %entry
+; CHECK-NEXT: blez a1, .LBB2_8
+; CHECK-NEXT: .LBB2_4: # %entry
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB2_5: # %entry
+; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: blt a0, a2, .LBB2_2
+; CHECK-NEXT: .LBB2_6: # %entry
+; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: bgtz a0, .LBB2_3
+; CHECK-NEXT: .LBB2_7: # %entry
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: bgtz a1, .LBB2_4
+; CHECK-NEXT: .LBB2_8: # %entry
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i64>
+ %0 = icmp slt <2 x i64> %conv, <i64 4294967295, i64 4294967295>
+ %spec.store.select = select <2 x i1> %0, <2 x i64> %conv, <2 x i64> <i64 4294967295, i64 4294967295>
+ %1 = icmp sgt <2 x i64> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <2 x i1> %1, <2 x i64> %spec.store.select, <2 x i64> zeroinitializer
+ %conv6 = trunc <2 x i64> %spec.store.select7 to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <4 x i32> @stest_f32i32(<4 x float> %x) {
+; CHECK-LABEL: stest_f32i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.w.x ft1, a4
+; CHECK-NEXT: fmv.w.x ft2, a3
+; CHECK-NEXT: fmv.w.x ft0, a2
+; CHECK-NEXT: fcvt.l.s a2, ft1, rtz
+; CHECK-NEXT: lui a6, 524288
+; CHECK-NEXT: addiw a5, a6, -1
+; CHECK-NEXT: fcvt.l.s a3, ft2, rtz
+; CHECK-NEXT: blt a2, a5, .LBB3_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a2, a5
+; CHECK-NEXT: .LBB3_2: # %entry
+; CHECK-NEXT: fmv.w.x ft1, a1
+; CHECK-NEXT: fcvt.l.s a1, ft0, rtz
+; CHECK-NEXT: bge a3, a5, .LBB3_11
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: fcvt.l.s a4, ft1, rtz
+; CHECK-NEXT: bge a1, a5, .LBB3_12
+; CHECK-NEXT: .LBB3_4: # %entry
+; CHECK-NEXT: bge a4, a5, .LBB3_13
+; CHECK-NEXT: .LBB3_5: # %entry
+; CHECK-NEXT: bge a6, a4, .LBB3_14
+; CHECK-NEXT: .LBB3_6: # %entry
+; CHECK-NEXT: bge a6, a1, .LBB3_15
+; CHECK-NEXT: .LBB3_7: # %entry
+; CHECK-NEXT: bge a6, a3, .LBB3_16
+; CHECK-NEXT: .LBB3_8: # %entry
+; CHECK-NEXT: blt a6, a2, .LBB3_10
+; CHECK-NEXT: .LBB3_9: # %entry
+; CHECK-NEXT: lui a2, 524288
+; CHECK-NEXT: .LBB3_10: # %entry
+; CHECK-NEXT: sw a2, 12(a0)
+; CHECK-NEXT: sw a3, 8(a0)
+; CHECK-NEXT: sw a1, 4(a0)
+; CHECK-NEXT: sw a4, 0(a0)
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB3_11: # %entry
+; CHECK-NEXT: mv a3, a5
+; CHECK-NEXT: fcvt.l.s a4, ft1, rtz
+; CHECK-NEXT: blt a1, a5, .LBB3_4
+; CHECK-NEXT: .LBB3_12: # %entry
+; CHECK-NEXT: mv a1, a5
+; CHECK-NEXT: blt a4, a5, .LBB3_5
+; CHECK-NEXT: .LBB3_13: # %entry
+; CHECK-NEXT: mv a4, a5
+; CHECK-NEXT: blt a6, a4, .LBB3_6
+; CHECK-NEXT: .LBB3_14: # %entry
+; CHECK-NEXT: lui a4, 524288
+; CHECK-NEXT: blt a6, a1, .LBB3_7
+; CHECK-NEXT: .LBB3_15: # %entry
+; CHECK-NEXT: lui a1, 524288
+; CHECK-NEXT: blt a6, a3, .LBB3_8
+; CHECK-NEXT: .LBB3_16: # %entry
+; CHECK-NEXT: lui a3, 524288
+; CHECK-NEXT: bge a6, a2, .LBB3_9
+; CHECK-NEXT: j .LBB3_10
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i64>
+ %0 = icmp slt <4 x i64> %conv, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
+ %1 = icmp sgt <4 x i64> %spec.store.select, <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
+ %spec.store.select7 = select <4 x i1> %1, <4 x i64> %spec.store.select, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @utest_f32i32(<4 x float> %x) {
+; CHECK-LABEL: utest_f32i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.w.x ft1, a1
+; CHECK-NEXT: fmv.w.x ft2, a2
+; CHECK-NEXT: fmv.w.x ft0, a3
+; CHECK-NEXT: fcvt.lu.s a1, ft1, rtz
+; CHECK-NEXT: li a2, -1
+; CHECK-NEXT: srli a3, a2, 32
+; CHECK-NEXT: fcvt.lu.s a2, ft2, rtz
+; CHECK-NEXT: bltu a1, a3, .LBB4_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a1, a3
+; CHECK-NEXT: .LBB4_2: # %entry
+; CHECK-NEXT: fmv.w.x ft1, a4
+; CHECK-NEXT: fcvt.lu.s a4, ft0, rtz
+; CHECK-NEXT: bgeu a2, a3, .LBB4_7
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: fcvt.lu.s a5, ft1, rtz
+; CHECK-NEXT: bgeu a4, a3, .LBB4_8
+; CHECK-NEXT: .LBB4_4: # %entry
+; CHECK-NEXT: bltu a5, a3, .LBB4_6
+; CHECK-NEXT: .LBB4_5: # %entry
+; CHECK-NEXT: mv a5, a3
+; CHECK-NEXT: .LBB4_6: # %entry
+; CHECK-NEXT: sw a5, 12(a0)
+; CHECK-NEXT: sw a4, 8(a0)
+; CHECK-NEXT: sw a2, 4(a0)
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB4_7: # %entry
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: fcvt.lu.s a5, ft1, rtz
+; CHECK-NEXT: bltu a4, a3, .LBB4_4
+; CHECK-NEXT: .LBB4_8: # %entry
+; CHECK-NEXT: mv a4, a3
+; CHECK-NEXT: bgeu a5, a3, .LBB4_5
+; CHECK-NEXT: j .LBB4_6
+entry:
+ %conv = fptoui <4 x float> %x to <4 x i64>
+ %0 = icmp ult <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %conv6 = trunc <4 x i64> %spec.store.select to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @ustest_f32i32(<4 x float> %x) {
+; CHECK-LABEL: ustest_f32i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.w.x ft1, a4
+; CHECK-NEXT: fmv.w.x ft2, a3
+; CHECK-NEXT: fmv.w.x ft0, a2
+; CHECK-NEXT: fcvt.l.s a2, ft1, rtz
+; CHECK-NEXT: li a3, -1
+; CHECK-NEXT: srli a5, a3, 32
+; CHECK-NEXT: fcvt.l.s a3, ft2, rtz
+; CHECK-NEXT: blt a2, a5, .LBB5_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a2, a5
+; CHECK-NEXT: .LBB5_2: # %entry
+; CHECK-NEXT: fmv.w.x ft1, a1
+; CHECK-NEXT: fcvt.l.s a1, ft0, rtz
+; CHECK-NEXT: bge a3, a5, .LBB5_11
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: fcvt.l.s a4, ft1, rtz
+; CHECK-NEXT: bge a1, a5, .LBB5_12
+; CHECK-NEXT: .LBB5_4: # %entry
+; CHECK-NEXT: bge a4, a5, .LBB5_13
+; CHECK-NEXT: .LBB5_5: # %entry
+; CHECK-NEXT: blez a4, .LBB5_14
+; CHECK-NEXT: .LBB5_6: # %entry
+; CHECK-NEXT: blez a1, .LBB5_15
+; CHECK-NEXT: .LBB5_7: # %entry
+; CHECK-NEXT: blez a3, .LBB5_16
+; CHECK-NEXT: .LBB5_8: # %entry
+; CHECK-NEXT: bgtz a2, .LBB5_10
+; CHECK-NEXT: .LBB5_9: # %entry
+; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: .LBB5_10: # %entry
+; CHECK-NEXT: sw a2, 12(a0)
+; CHECK-NEXT: sw a3, 8(a0)
+; CHECK-NEXT: sw a1, 4(a0)
+; CHECK-NEXT: sw a4, 0(a0)
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB5_11: # %entry
+; CHECK-NEXT: mv a3, a5
+; CHECK-NEXT: fcvt.l.s a4, ft1, rtz
+; CHECK-NEXT: blt a1, a5, .LBB5_4
+; CHECK-NEXT: .LBB5_12: # %entry
+; CHECK-NEXT: mv a1, a5
+; CHECK-NEXT: blt a4, a5, .LBB5_5
+; CHECK-NEXT: .LBB5_13: # %entry
+; CHECK-NEXT: mv a4, a5
+; CHECK-NEXT: bgtz a4, .LBB5_6
+; CHECK-NEXT: .LBB5_14: # %entry
+; CHECK-NEXT: li a4, 0
+; CHECK-NEXT: bgtz a1, .LBB5_7
+; CHECK-NEXT: .LBB5_15: # %entry
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: bgtz a3, .LBB5_8
+; CHECK-NEXT: .LBB5_16: # %entry
+; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: blez a2, .LBB5_9
+; CHECK-NEXT: j .LBB5_10
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i64>
+ %0 = icmp slt <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %1 = icmp sgt <4 x i64> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <4 x i1> %1, <4 x i64> %spec.store.select, <4 x i64> zeroinitializer
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @stest_f16i32(<4 x half> %x) {
+; CHECK-LABEL: stest_f16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: .cfi_offset s3, -40
+; CHECK-NEXT: .cfi_offset s4, -48
+; CHECK-NEXT: lhu s2, 24(a1)
+; CHECK-NEXT: lhu s4, 0(a1)
+; CHECK-NEXT: lhu s0, 8(a1)
+; CHECK-NEXT: lhu a1, 16(a1)
+; CHECK-NEXT: mv s3, a0
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s1, a0
+; CHECK-NEXT: mv a0, s0
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: mv a0, s4
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s4, a0
+; CHECK-NEXT: fmv.w.x ft0, s0
+; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill
+; CHECK-NEXT: fmv.w.x ft0, s1
+; CHECK-NEXT: fcvt.l.s s0, ft0, rtz
+; CHECK-NEXT: mv a0, s2
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: fmv.w.x ft0, a0
+; CHECK-NEXT: fcvt.l.s a0, ft0, rtz
+; CHECK-NEXT: lui a1, 524288
+; CHECK-NEXT: addiw a4, a1, -1
+; CHECK-NEXT: blt a0, a4, .LBB6_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a0, a4
+; CHECK-NEXT: .LBB6_2: # %entry
+; CHECK-NEXT: fmv.w.x ft0, s4
+; CHECK-NEXT: flw ft1, 12(sp) # 4-byte Folded Reload
+; CHECK-NEXT: fcvt.l.s a2, ft1, rtz
+; CHECK-NEXT: bge s0, a4, .LBB6_11
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: fcvt.l.s a3, ft0, rtz
+; CHECK-NEXT: bge a2, a4, .LBB6_12
+; CHECK-NEXT: .LBB6_4: # %entry
+; CHECK-NEXT: bge a3, a4, .LBB6_13
+; CHECK-NEXT: .LBB6_5: # %entry
+; CHECK-NEXT: bge a1, a3, .LBB6_14
+; CHECK-NEXT: .LBB6_6: # %entry
+; CHECK-NEXT: bge a1, a2, .LBB6_15
+; CHECK-NEXT: .LBB6_7: # %entry
+; CHECK-NEXT: bge a1, s0, .LBB6_16
+; CHECK-NEXT: .LBB6_8: # %entry
+; CHECK-NEXT: blt a1, a0, .LBB6_10
+; CHECK-NEXT: .LBB6_9: # %entry
+; CHECK-NEXT: lui a0, 524288
+; CHECK-NEXT: .LBB6_10: # %entry
+; CHECK-NEXT: sw a0, 12(s3)
+; CHECK-NEXT: sw s0, 8(s3)
+; CHECK-NEXT: sw a2, 4(s3)
+; CHECK-NEXT: sw a3, 0(s3)
+; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s3, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s4, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 64
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB6_11: # %entry
+; CHECK-NEXT: mv s0, a4
+; CHECK-NEXT: fcvt.l.s a3, ft0, rtz
+; CHECK-NEXT: blt a2, a4, .LBB6_4
+; CHECK-NEXT: .LBB6_12: # %entry
+; CHECK-NEXT: mv a2, a4
+; CHECK-NEXT: blt a3, a4, .LBB6_5
+; CHECK-NEXT: .LBB6_13: # %entry
+; CHECK-NEXT: mv a3, a4
+; CHECK-NEXT: blt a1, a3, .LBB6_6
+; CHECK-NEXT: .LBB6_14: # %entry
+; CHECK-NEXT: lui a3, 524288
+; CHECK-NEXT: blt a1, a2, .LBB6_7
+; CHECK-NEXT: .LBB6_15: # %entry
+; CHECK-NEXT: lui a2, 524288
+; CHECK-NEXT: blt a1, s0, .LBB6_8
+; CHECK-NEXT: .LBB6_16: # %entry
+; CHECK-NEXT: lui s0, 524288
+; CHECK-NEXT: bge a1, a0, .LBB6_9
+; CHECK-NEXT: j .LBB6_10
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i64>
+ %0 = icmp slt <4 x i64> %conv, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
+ %1 = icmp sgt <4 x i64> %spec.store.select, <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
+ %spec.store.select7 = select <4 x i1> %1, <4 x i64> %spec.store.select, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @utesth_f16i32(<4 x half> %x) {
+; CHECK-LABEL: utesth_f16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: .cfi_offset s3, -40
+; CHECK-NEXT: .cfi_offset s4, -48
+; CHECK-NEXT: lhu s2, 0(a1)
+; CHECK-NEXT: lhu s3, 24(a1)
+; CHECK-NEXT: lhu s1, 16(a1)
+; CHECK-NEXT: lhu a1, 8(a1)
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s4, a0
+; CHECK-NEXT: mv a0, s1
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s1, a0
+; CHECK-NEXT: mv a0, s3
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s3, a0
+; CHECK-NEXT: fmv.w.x ft0, s1
+; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill
+; CHECK-NEXT: fmv.w.x ft0, s4
+; CHECK-NEXT: fcvt.lu.s s1, ft0, rtz
+; CHECK-NEXT: mv a0, s2
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: fmv.w.x ft0, a0
+; CHECK-NEXT: fcvt.lu.s a0, ft0, rtz
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: srli a1, a1, 32
+; CHECK-NEXT: bltu a0, a1, .LBB7_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB7_2: # %entry
+; CHECK-NEXT: fmv.w.x ft0, s3
+; CHECK-NEXT: flw ft1, 12(sp) # 4-byte Folded Reload
+; CHECK-NEXT: fcvt.lu.s a2, ft1, rtz
+; CHECK-NEXT: bgeu s1, a1, .LBB7_7
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: fcvt.lu.s a3, ft0, rtz
+; CHECK-NEXT: bgeu a2, a1, .LBB7_8
+; CHECK-NEXT: .LBB7_4: # %entry
+; CHECK-NEXT: bltu a3, a1, .LBB7_6
+; CHECK-NEXT: .LBB7_5: # %entry
+; CHECK-NEXT: mv a3, a1
+; CHECK-NEXT: .LBB7_6: # %entry
+; CHECK-NEXT: sw a3, 12(s0)
+; CHECK-NEXT: sw a2, 8(s0)
+; CHECK-NEXT: sw s1, 4(s0)
+; CHECK-NEXT: sw a0, 0(s0)
+; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s3, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s4, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 64
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB7_7: # %entry
+; CHECK-NEXT: mv s1, a1
+; CHECK-NEXT: fcvt.lu.s a3, ft0, rtz
+; CHECK-NEXT: bltu a2, a1, .LBB7_4
+; CHECK-NEXT: .LBB7_8: # %entry
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: bgeu a3, a1, .LBB7_5
+; CHECK-NEXT: j .LBB7_6
+entry:
+ %conv = fptoui <4 x half> %x to <4 x i64>
+ %0 = icmp ult <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %conv6 = trunc <4 x i64> %spec.store.select to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @ustest_f16i32(<4 x half> %x) {
+; CHECK-LABEL: ustest_f16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: .cfi_offset s3, -40
+; CHECK-NEXT: .cfi_offset s4, -48
+; CHECK-NEXT: lhu s2, 24(a1)
+; CHECK-NEXT: lhu s4, 0(a1)
+; CHECK-NEXT: lhu s0, 8(a1)
+; CHECK-NEXT: lhu a1, 16(a1)
+; CHECK-NEXT: mv s3, a0
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s1, a0
+; CHECK-NEXT: mv a0, s0
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: mv a0, s4
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s4, a0
+; CHECK-NEXT: fmv.w.x ft0, s0
+; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill
+; CHECK-NEXT: fmv.w.x ft0, s1
+; CHECK-NEXT: fcvt.l.s s0, ft0, rtz
+; CHECK-NEXT: mv a0, s2
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: fmv.w.x ft0, a0
+; CHECK-NEXT: fcvt.l.s a0, ft0, rtz
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: srli a3, a1, 32
+; CHECK-NEXT: blt a0, a3, .LBB8_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a0, a3
+; CHECK-NEXT: .LBB8_2: # %entry
+; CHECK-NEXT: fmv.w.x ft0, s4
+; CHECK-NEXT: flw ft1, 12(sp) # 4-byte Folded Reload
+; CHECK-NEXT: fcvt.l.s a1, ft1, rtz
+; CHECK-NEXT: bge s0, a3, .LBB8_11
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: fcvt.l.s a2, ft0, rtz
+; CHECK-NEXT: bge a1, a3, .LBB8_12
+; CHECK-NEXT: .LBB8_4: # %entry
+; CHECK-NEXT: bge a2, a3, .LBB8_13
+; CHECK-NEXT: .LBB8_5: # %entry
+; CHECK-NEXT: blez a2, .LBB8_14
+; CHECK-NEXT: .LBB8_6: # %entry
+; CHECK-NEXT: blez a1, .LBB8_15
+; CHECK-NEXT: .LBB8_7: # %entry
+; CHECK-NEXT: blez s0, .LBB8_16
+; CHECK-NEXT: .LBB8_8: # %entry
+; CHECK-NEXT: bgtz a0, .LBB8_10
+; CHECK-NEXT: .LBB8_9: # %entry
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: .LBB8_10: # %entry
+; CHECK-NEXT: sw a0, 12(s3)
+; CHECK-NEXT: sw s0, 8(s3)
+; CHECK-NEXT: sw a1, 4(s3)
+; CHECK-NEXT: sw a2, 0(s3)
+; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s3, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s4, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 64
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB8_11: # %entry
+; CHECK-NEXT: mv s0, a3
+; CHECK-NEXT: fcvt.l.s a2, ft0, rtz
+; CHECK-NEXT: blt a1, a3, .LBB8_4
+; CHECK-NEXT: .LBB8_12: # %entry
+; CHECK-NEXT: mv a1, a3
+; CHECK-NEXT: blt a2, a3, .LBB8_5
+; CHECK-NEXT: .LBB8_13: # %entry
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: bgtz a2, .LBB8_6
+; CHECK-NEXT: .LBB8_14: # %entry
+; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: bgtz a1, .LBB8_7
+; CHECK-NEXT: .LBB8_15: # %entry
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: bgtz s0, .LBB8_8
+; CHECK-NEXT: .LBB8_16: # %entry
+; CHECK-NEXT: li s0, 0
+; CHECK-NEXT: blez a0, .LBB8_9
+; CHECK-NEXT: j .LBB8_10
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i64>
+ %0 = icmp slt <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %1 = icmp sgt <4 x i64> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <4 x i1> %1, <4 x i64> %spec.store.select, <4 x i64> zeroinitializer
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+; i16 saturate
+
+define <2 x i16> @stest_f64i16(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.d.x ft0, a1
+; CHECK-NEXT: fmv.d.x ft1, a0
+; CHECK-NEXT: fcvt.w.d a1, ft0, rtz
+; CHECK-NEXT: lui a0, 8
+; CHECK-NEXT: addiw a2, a0, -1
+; CHECK-NEXT: fcvt.w.d a0, ft1, rtz
+; CHECK-NEXT: bge a1, a2, .LBB9_5
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: bge a0, a2, .LBB9_6
+; CHECK-NEXT: .LBB9_2: # %entry
+; CHECK-NEXT: lui a2, 1048568
+; CHECK-NEXT: bge a2, a0, .LBB9_7
+; CHECK-NEXT: .LBB9_3: # %entry
+; CHECK-NEXT: bge a2, a1, .LBB9_8
+; CHECK-NEXT: .LBB9_4: # %entry
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB9_5: # %entry
+; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: blt a0, a2, .LBB9_2
+; CHECK-NEXT: .LBB9_6: # %entry
+; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: lui a2, 1048568
+; CHECK-NEXT: blt a2, a0, .LBB9_3
+; CHECK-NEXT: .LBB9_7: # %entry
+; CHECK-NEXT: lui a0, 1048568
+; CHECK-NEXT: blt a2, a1, .LBB9_4
+; CHECK-NEXT: .LBB9_8: # %entry
+; CHECK-NEXT: lui a1, 1048568
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i32>
+ %0 = icmp slt <2 x i32> %conv, <i32 32767, i32 32767>
+ %spec.store.select = select <2 x i1> %0, <2 x i32> %conv, <2 x i32> <i32 32767, i32 32767>
+ %1 = icmp sgt <2 x i32> %spec.store.select, <i32 -32768, i32 -32768>
+ %spec.store.select7 = select <2 x i1> %1, <2 x i32> %spec.store.select, <2 x i32> <i32 -32768, i32 -32768>
+ %conv6 = trunc <2 x i32> %spec.store.select7 to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <2 x i16> @utest_f64i16(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.d.x ft0, a0
+; CHECK-NEXT: fmv.d.x ft1, a1
+; CHECK-NEXT: fcvt.wu.d a0, ft0, rtz
+; CHECK-NEXT: lui a1, 16
+; CHECK-NEXT: addiw a2, a1, -1
+; CHECK-NEXT: fcvt.wu.d a1, ft1, rtz
+; CHECK-NEXT: bgeu a0, a2, .LBB10_3
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: bgeu a1, a2, .LBB10_4
+; CHECK-NEXT: .LBB10_2: # %entry
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB10_3: # %entry
+; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: bltu a1, a2, .LBB10_2
+; CHECK-NEXT: .LBB10_4: # %entry
+; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i32>
+ %0 = icmp ult <2 x i32> %conv, <i32 65535, i32 65535>
+ %spec.store.select = select <2 x i1> %0, <2 x i32> %conv, <2 x i32> <i32 65535, i32 65535>
+ %conv6 = trunc <2 x i32> %spec.store.select to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <2 x i16> @ustest_f64i16(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.d.x ft0, a1
+; CHECK-NEXT: fmv.d.x ft1, a0
+; CHECK-NEXT: fcvt.w.d a1, ft0, rtz
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addiw a2, a0, -1
+; CHECK-NEXT: fcvt.w.d a0, ft1, rtz
+; CHECK-NEXT: bge a1, a2, .LBB11_5
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: bge a0, a2, .LBB11_6
+; CHECK-NEXT: .LBB11_2: # %entry
+; CHECK-NEXT: blez a0, .LBB11_7
+; CHECK-NEXT: .LBB11_3: # %entry
+; CHECK-NEXT: blez a1, .LBB11_8
+; CHECK-NEXT: .LBB11_4: # %entry
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB11_5: # %entry
+; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: blt a0, a2, .LBB11_2
+; CHECK-NEXT: .LBB11_6: # %entry
+; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: bgtz a0, .LBB11_3
+; CHECK-NEXT: .LBB11_7: # %entry
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: bgtz a1, .LBB11_4
+; CHECK-NEXT: .LBB11_8: # %entry
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i32>
+ %0 = icmp slt <2 x i32> %conv, <i32 65535, i32 65535>
+ %spec.store.select = select <2 x i1> %0, <2 x i32> %conv, <2 x i32> <i32 65535, i32 65535>
+ %1 = icmp sgt <2 x i32> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <2 x i1> %1, <2 x i32> %spec.store.select, <2 x i32> zeroinitializer
+ %conv6 = trunc <2 x i32> %spec.store.select7 to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <4 x i16> @stest_f32i16(<4 x float> %x) {
+; CHECK-LABEL: stest_f32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.w.x ft1, a4
+; CHECK-NEXT: fmv.w.x ft2, a3
+; CHECK-NEXT: fmv.w.x ft0, a2
+; CHECK-NEXT: fcvt.w.s a2, ft1, rtz
+; CHECK-NEXT: lui a3, 8
+; CHECK-NEXT: addiw a5, a3, -1
+; CHECK-NEXT: fcvt.w.s a3, ft2, rtz
+; CHECK-NEXT: blt a2, a5, .LBB12_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a2, a5
+; CHECK-NEXT: .LBB12_2: # %entry
+; CHECK-NEXT: fmv.w.x ft1, a1
+; CHECK-NEXT: fcvt.w.s a1, ft0, rtz
+; CHECK-NEXT: bge a3, a5, .LBB12_11
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: fcvt.w.s a4, ft1, rtz
+; CHECK-NEXT: bge a1, a5, .LBB12_12
+; CHECK-NEXT: .LBB12_4: # %entry
+; CHECK-NEXT: bge a4, a5, .LBB12_13
+; CHECK-NEXT: .LBB12_5: # %entry
+; CHECK-NEXT: lui a5, 1048568
+; CHECK-NEXT: bge a5, a4, .LBB12_14
+; CHECK-NEXT: .LBB12_6: # %entry
+; CHECK-NEXT: bge a5, a1, .LBB12_15
+; CHECK-NEXT: .LBB12_7: # %entry
+; CHECK-NEXT: bge a5, a3, .LBB12_16
+; CHECK-NEXT: .LBB12_8: # %entry
+; CHECK-NEXT: blt a5, a2, .LBB12_10
+; CHECK-NEXT: .LBB12_9: # %entry
+; CHECK-NEXT: lui a2, 1048568
+; CHECK-NEXT: .LBB12_10: # %entry
+; CHECK-NEXT: sh a2, 6(a0)
+; CHECK-NEXT: sh a3, 4(a0)
+; CHECK-NEXT: sh a1, 2(a0)
+; CHECK-NEXT: sh a4, 0(a0)
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB12_11: # %entry
+; CHECK-NEXT: mv a3, a5
+; CHECK-NEXT: fcvt.w.s a4, ft1, rtz
+; CHECK-NEXT: blt a1, a5, .LBB12_4
+; CHECK-NEXT: .LBB12_12: # %entry
+; CHECK-NEXT: mv a1, a5
+; CHECK-NEXT: blt a4, a5, .LBB12_5
+; CHECK-NEXT: .LBB12_13: # %entry
+; CHECK-NEXT: mv a4, a5
+; CHECK-NEXT: lui a5, 1048568
+; CHECK-NEXT: blt a5, a4, .LBB12_6
+; CHECK-NEXT: .LBB12_14: # %entry
+; CHECK-NEXT: lui a4, 1048568
+; CHECK-NEXT: blt a5, a1, .LBB12_7
+; CHECK-NEXT: .LBB12_15: # %entry
+; CHECK-NEXT: lui a1, 1048568
+; CHECK-NEXT: blt a5, a3, .LBB12_8
+; CHECK-NEXT: .LBB12_16: # %entry
+; CHECK-NEXT: lui a3, 1048568
+; CHECK-NEXT: bge a5, a2, .LBB12_9
+; CHECK-NEXT: j .LBB12_10
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i32>
+ %0 = icmp slt <4 x i32> %conv, <i32 32767, i32 32767, i32 32767, i32 32767>
+ %spec.store.select = select <4 x i1> %0, <4 x i32> %conv, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>
+ %1 = icmp sgt <4 x i32> %spec.store.select, <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %spec.store.select7 = select <4 x i1> %1, <4 x i32> %spec.store.select, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %conv6 = trunc <4 x i32> %spec.store.select7 to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <4 x i16> @utest_f32i16(<4 x float> %x) {
+; CHECK-LABEL: utest_f32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.w.x ft1, a1
+; CHECK-NEXT: fmv.w.x ft2, a2
+; CHECK-NEXT: fmv.w.x ft0, a3
+; CHECK-NEXT: fcvt.wu.s a1, ft1, rtz
+; CHECK-NEXT: lui a2, 16
+; CHECK-NEXT: addiw a3, a2, -1
+; CHECK-NEXT: fcvt.wu.s a2, ft2, rtz
+; CHECK-NEXT: bltu a1, a3, .LBB13_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a1, a3
+; CHECK-NEXT: .LBB13_2: # %entry
+; CHECK-NEXT: fmv.w.x ft1, a4
+; CHECK-NEXT: fcvt.wu.s a4, ft0, rtz
+; CHECK-NEXT: bgeu a2, a3, .LBB13_7
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: fcvt.wu.s a5, ft1, rtz
+; CHECK-NEXT: bgeu a4, a3, .LBB13_8
+; CHECK-NEXT: .LBB13_4: # %entry
+; CHECK-NEXT: bltu a5, a3, .LBB13_6
+; CHECK-NEXT: .LBB13_5: # %entry
+; CHECK-NEXT: mv a5, a3
+; CHECK-NEXT: .LBB13_6: # %entry
+; CHECK-NEXT: sh a5, 6(a0)
+; CHECK-NEXT: sh a4, 4(a0)
+; CHECK-NEXT: sh a2, 2(a0)
+; CHECK-NEXT: sh a1, 0(a0)
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB13_7: # %entry
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: fcvt.wu.s a5, ft1, rtz
+; CHECK-NEXT: bltu a4, a3, .LBB13_4
+; CHECK-NEXT: .LBB13_8: # %entry
+; CHECK-NEXT: mv a4, a3
+; CHECK-NEXT: bgeu a5, a3, .LBB13_5
+; CHECK-NEXT: j .LBB13_6
+entry:
+ %conv = fptoui <4 x float> %x to <4 x i32>
+ %0 = icmp ult <4 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535>
+ %spec.store.select = select <4 x i1> %0, <4 x i32> %conv, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
+ %conv6 = trunc <4 x i32> %spec.store.select to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <4 x i16> @ustest_f32i16(<4 x float> %x) {
+; CHECK-LABEL: ustest_f32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.w.x ft1, a4
+; CHECK-NEXT: fmv.w.x ft2, a3
+; CHECK-NEXT: fmv.w.x ft0, a2
+; CHECK-NEXT: fcvt.w.s a2, ft1, rtz
+; CHECK-NEXT: lui a3, 16
+; CHECK-NEXT: addiw a5, a3, -1
+; CHECK-NEXT: fcvt.w.s a3, ft2, rtz
+; CHECK-NEXT: blt a2, a5, .LBB14_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a2, a5
+; CHECK-NEXT: .LBB14_2: # %entry
+; CHECK-NEXT: fmv.w.x ft1, a1
+; CHECK-NEXT: fcvt.w.s a1, ft0, rtz
+; CHECK-NEXT: bge a3, a5, .LBB14_11
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: fcvt.w.s a4, ft1, rtz
+; CHECK-NEXT: bge a1, a5, .LBB14_12
+; CHECK-NEXT: .LBB14_4: # %entry
+; CHECK-NEXT: bge a4, a5, .LBB14_13
+; CHECK-NEXT: .LBB14_5: # %entry
+; CHECK-NEXT: blez a4, .LBB14_14
+; CHECK-NEXT: .LBB14_6: # %entry
+; CHECK-NEXT: blez a1, .LBB14_15
+; CHECK-NEXT: .LBB14_7: # %entry
+; CHECK-NEXT: blez a3, .LBB14_16
+; CHECK-NEXT: .LBB14_8: # %entry
+; CHECK-NEXT: bgtz a2, .LBB14_10
+; CHECK-NEXT: .LBB14_9: # %entry
+; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: .LBB14_10: # %entry
+; CHECK-NEXT: sh a2, 6(a0)
+; CHECK-NEXT: sh a3, 4(a0)
+; CHECK-NEXT: sh a1, 2(a0)
+; CHECK-NEXT: sh a4, 0(a0)
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB14_11: # %entry
+; CHECK-NEXT: mv a3, a5
+; CHECK-NEXT: fcvt.w.s a4, ft1, rtz
+; CHECK-NEXT: blt a1, a5, .LBB14_4
+; CHECK-NEXT: .LBB14_12: # %entry
+; CHECK-NEXT: mv a1, a5
+; CHECK-NEXT: blt a4, a5, .LBB14_5
+; CHECK-NEXT: .LBB14_13: # %entry
+; CHECK-NEXT: mv a4, a5
+; CHECK-NEXT: bgtz a4, .LBB14_6
+; CHECK-NEXT: .LBB14_14: # %entry
+; CHECK-NEXT: li a4, 0
+; CHECK-NEXT: bgtz a1, .LBB14_7
+; CHECK-NEXT: .LBB14_15: # %entry
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: bgtz a3, .LBB14_8
+; CHECK-NEXT: .LBB14_16: # %entry
+; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: blez a2, .LBB14_9
+; CHECK-NEXT: j .LBB14_10
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i32>
+ %0 = icmp slt <4 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535>
+ %spec.store.select = select <4 x i1> %0, <4 x i32> %conv, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
+ %1 = icmp sgt <4 x i32> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <4 x i1> %1, <4 x i32> %spec.store.select, <4 x i32> zeroinitializer
+ %conv6 = trunc <4 x i32> %spec.store.select7 to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <8 x i16> @stest_f16i16(<8 x half> %x) {
+; CHECK-LABEL: stest_f16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -96
+; CHECK-NEXT: .cfi_def_cfa_offset 96
+; CHECK-NEXT: sd ra, 88(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 80(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 72(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 64(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s3, 56(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s4, 48(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s5, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s6, 32(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s7, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s8, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s9, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: .cfi_offset s3, -40
+; CHECK-NEXT: .cfi_offset s4, -48
+; CHECK-NEXT: .cfi_offset s5, -56
+; CHECK-NEXT: .cfi_offset s6, -64
+; CHECK-NEXT: .cfi_offset s7, -72
+; CHECK-NEXT: .cfi_offset s8, -80
+; CHECK-NEXT: .cfi_offset s9, -88
+; CHECK-NEXT: lhu s6, 56(a1)
+; CHECK-NEXT: lhu s2, 0(a1)
+; CHECK-NEXT: lhu s3, 8(a1)
+; CHECK-NEXT: lhu s4, 16(a1)
+; CHECK-NEXT: lhu s5, 24(a1)
+; CHECK-NEXT: lhu s1, 32(a1)
+; CHECK-NEXT: lhu s0, 40(a1)
+; CHECK-NEXT: lhu a1, 48(a1)
+; CHECK-NEXT: mv s8, a0
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s9, a0
+; CHECK-NEXT: mv a0, s0
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: mv a0, s1
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s7, a0
+; CHECK-NEXT: mv a0, s5
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s5, a0
+; CHECK-NEXT: mv a0, s4
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s4, a0
+; CHECK-NEXT: mv a0, s3
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s3, a0
+; CHECK-NEXT: mv a0, s2
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s2, a0
+; CHECK-NEXT: fmv.w.x ft0, s0
+; CHECK-NEXT: fsw ft0, 4(sp) # 4-byte Folded Spill
+; CHECK-NEXT: fmv.w.x ft0, s9
+; CHECK-NEXT: fcvt.l.s s9, ft0, rtz
+; CHECK-NEXT: mv a0, s6
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: fmv.w.x ft0, a0
+; CHECK-NEXT: fcvt.l.s a0, ft0, rtz
+; CHECK-NEXT: lui a1, 8
+; CHECK-NEXT: addiw s1, a1, -1
+; CHECK-NEXT: blt a0, s1, .LBB15_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a0, s1
+; CHECK-NEXT: .LBB15_2: # %entry
+; CHECK-NEXT: fmv.w.x ft1, s7
+; CHECK-NEXT: flw ft0, 4(sp) # 4-byte Folded Reload
+; CHECK-NEXT: fcvt.l.s a1, ft0, rtz
+; CHECK-NEXT: blt s9, s1, .LBB15_4
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: mv s9, s1
+; CHECK-NEXT: .LBB15_4: # %entry
+; CHECK-NEXT: fmv.w.x ft0, s5
+; CHECK-NEXT: fcvt.l.s a2, ft1, rtz
+; CHECK-NEXT: blt a1, s1, .LBB15_6
+; CHECK-NEXT: # %bb.5: # %entry
+; CHECK-NEXT: mv a1, s1
+; CHECK-NEXT: .LBB15_6: # %entry
+; CHECK-NEXT: fmv.w.x ft1, s4
+; CHECK-NEXT: fcvt.l.s a3, ft0, rtz
+; CHECK-NEXT: blt a2, s1, .LBB15_8
+; CHECK-NEXT: # %bb.7: # %entry
+; CHECK-NEXT: mv a2, s1
+; CHECK-NEXT: .LBB15_8: # %entry
+; CHECK-NEXT: fmv.w.x ft0, s3
+; CHECK-NEXT: fcvt.l.s a4, ft1, rtz
+; CHECK-NEXT: blt a3, s1, .LBB15_10
+; CHECK-NEXT: # %bb.9: # %entry
+; CHECK-NEXT: mv a3, s1
+; CHECK-NEXT: .LBB15_10: # %entry
+; CHECK-NEXT: fmv.w.x ft1, s2
+; CHECK-NEXT: fcvt.l.s a5, ft0, rtz
+; CHECK-NEXT: bge a4, s1, .LBB15_23
+; CHECK-NEXT: # %bb.11: # %entry
+; CHECK-NEXT: fcvt.l.s s0, ft1, rtz
+; CHECK-NEXT: bge a5, s1, .LBB15_24
+; CHECK-NEXT: .LBB15_12: # %entry
+; CHECK-NEXT: bge s0, s1, .LBB15_25
+; CHECK-NEXT: .LBB15_13: # %entry
+; CHECK-NEXT: lui s1, 1048568
+; CHECK-NEXT: bge s1, s0, .LBB15_26
+; CHECK-NEXT: .LBB15_14: # %entry
+; CHECK-NEXT: bge s1, a5, .LBB15_27
+; CHECK-NEXT: .LBB15_15: # %entry
+; CHECK-NEXT: bge s1, a4, .LBB15_28
+; CHECK-NEXT: .LBB15_16: # %entry
+; CHECK-NEXT: bge s1, a3, .LBB15_29
+; CHECK-NEXT: .LBB15_17: # %entry
+; CHECK-NEXT: bge s1, a2, .LBB15_30
+; CHECK-NEXT: .LBB15_18: # %entry
+; CHECK-NEXT: bge s1, a1, .LBB15_31
+; CHECK-NEXT: .LBB15_19: # %entry
+; CHECK-NEXT: bge s1, s9, .LBB15_32
+; CHECK-NEXT: .LBB15_20: # %entry
+; CHECK-NEXT: blt s1, a0, .LBB15_22
+; CHECK-NEXT: .LBB15_21: # %entry
+; CHECK-NEXT: lui a0, 1048568
+; CHECK-NEXT: .LBB15_22: # %entry
+; CHECK-NEXT: sh a0, 14(s8)
+; CHECK-NEXT: sh s9, 12(s8)
+; CHECK-NEXT: sh a1, 10(s8)
+; CHECK-NEXT: sh a2, 8(s8)
+; CHECK-NEXT: sh a3, 6(s8)
+; CHECK-NEXT: sh a4, 4(s8)
+; CHECK-NEXT: sh a5, 2(s8)
+; CHECK-NEXT: sh s0, 0(s8)
+; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 80(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 64(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s3, 56(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s4, 48(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s5, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s6, 32(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s7, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s8, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s9, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 96
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB15_23: # %entry
+; CHECK-NEXT: mv a4, s1
+; CHECK-NEXT: fcvt.l.s s0, ft1, rtz
+; CHECK-NEXT: blt a5, s1, .LBB15_12
+; CHECK-NEXT: .LBB15_24: # %entry
+; CHECK-NEXT: mv a5, s1
+; CHECK-NEXT: blt s0, s1, .LBB15_13
+; CHECK-NEXT: .LBB15_25: # %entry
+; CHECK-NEXT: mv s0, s1
+; CHECK-NEXT: lui s1, 1048568
+; CHECK-NEXT: blt s1, s0, .LBB15_14
+; CHECK-NEXT: .LBB15_26: # %entry
+; CHECK-NEXT: lui s0, 1048568
+; CHECK-NEXT: blt s1, a5, .LBB15_15
+; CHECK-NEXT: .LBB15_27: # %entry
+; CHECK-NEXT: lui a5, 1048568
+; CHECK-NEXT: blt s1, a4, .LBB15_16
+; CHECK-NEXT: .LBB15_28: # %entry
+; CHECK-NEXT: lui a4, 1048568
+; CHECK-NEXT: blt s1, a3, .LBB15_17
+; CHECK-NEXT: .LBB15_29: # %entry
+; CHECK-NEXT: lui a3, 1048568
+; CHECK-NEXT: blt s1, a2, .LBB15_18
+; CHECK-NEXT: .LBB15_30: # %entry
+; CHECK-NEXT: lui a2, 1048568
+; CHECK-NEXT: blt s1, a1, .LBB15_19
+; CHECK-NEXT: .LBB15_31: # %entry
+; CHECK-NEXT: lui a1, 1048568
+; CHECK-NEXT: blt s1, s9, .LBB15_20
+; CHECK-NEXT: .LBB15_32: # %entry
+; CHECK-NEXT: lui s9, 1048568
+; CHECK-NEXT: bge s1, a0, .LBB15_21
+; CHECK-NEXT: j .LBB15_22
+entry:
+ %conv = fptosi <8 x half> %x to <8 x i32>
+ %0 = icmp slt <8 x i32> %conv, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
+ %spec.store.select = select <8 x i1> %0, <8 x i32> %conv, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
+ %1 = icmp sgt <8 x i32> %spec.store.select, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %spec.store.select7 = select <8 x i1> %1, <8 x i32> %spec.store.select, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %conv6 = trunc <8 x i32> %spec.store.select7 to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+define <8 x i16> @utesth_f16i16(<8 x half> %x) {
+; CHECK-LABEL: utesth_f16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -96
+; CHECK-NEXT: .cfi_def_cfa_offset 96
+; CHECK-NEXT: sd ra, 88(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 80(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 72(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 64(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s3, 56(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s4, 48(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s5, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s6, 32(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s7, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s8, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s9, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: .cfi_offset s3, -40
+; CHECK-NEXT: .cfi_offset s4, -48
+; CHECK-NEXT: .cfi_offset s5, -56
+; CHECK-NEXT: .cfi_offset s6, -64
+; CHECK-NEXT: .cfi_offset s7, -72
+; CHECK-NEXT: .cfi_offset s8, -80
+; CHECK-NEXT: .cfi_offset s9, -88
+; CHECK-NEXT: lhu s5, 0(a1)
+; CHECK-NEXT: lhu s2, 56(a1)
+; CHECK-NEXT: lhu s3, 48(a1)
+; CHECK-NEXT: lhu s4, 40(a1)
+; CHECK-NEXT: lhu s6, 32(a1)
+; CHECK-NEXT: lhu s7, 24(a1)
+; CHECK-NEXT: lhu s1, 16(a1)
+; CHECK-NEXT: lhu a1, 8(a1)
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s8, a0
+; CHECK-NEXT: mv a0, s1
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s9, a0
+; CHECK-NEXT: mv a0, s7
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s1, a0
+; CHECK-NEXT: mv a0, s6
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s6, a0
+; CHECK-NEXT: mv a0, s4
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s4, a0
+; CHECK-NEXT: mv a0, s3
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s3, a0
+; CHECK-NEXT: mv a0, s2
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s2, a0
+; CHECK-NEXT: fmv.w.x ft0, s1
+; CHECK-NEXT: fsw ft0, 4(sp) # 4-byte Folded Spill
+; CHECK-NEXT: fmv.w.x ft0, s9
+; CHECK-NEXT: fcvt.lu.s s7, ft0, rtz
+; CHECK-NEXT: fmv.w.x ft0, s8
+; CHECK-NEXT: fcvt.lu.s s8, ft0, rtz
+; CHECK-NEXT: sext.w s1, s8
+; CHECK-NEXT: mv a0, s5
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: fmv.w.x ft0, a0
+; CHECK-NEXT: fcvt.lu.s a6, ft0, rtz
+; CHECK-NEXT: sext.w a0, a6
+; CHECK-NEXT: lui a1, 16
+; CHECK-NEXT: addiw a1, a1, -1
+; CHECK-NEXT: bltu a0, a1, .LBB16_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a6, a1
+; CHECK-NEXT: .LBB16_2: # %entry
+; CHECK-NEXT: fmv.w.x ft0, s6
+; CHECK-NEXT: flw ft1, 4(sp) # 4-byte Folded Reload
+; CHECK-NEXT: fcvt.lu.s a7, ft1, rtz
+; CHECK-NEXT: sext.w a4, s7
+; CHECK-NEXT: bltu s1, a1, .LBB16_4
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: mv s8, a1
+; CHECK-NEXT: .LBB16_4: # %entry
+; CHECK-NEXT: fmv.w.x ft1, s4
+; CHECK-NEXT: fcvt.lu.s a3, ft0, rtz
+; CHECK-NEXT: sext.w a5, a7
+; CHECK-NEXT: bltu a4, a1, .LBB16_6
+; CHECK-NEXT: # %bb.5: # %entry
+; CHECK-NEXT: mv s7, a1
+; CHECK-NEXT: .LBB16_6: # %entry
+; CHECK-NEXT: fmv.w.x ft0, s3
+; CHECK-NEXT: fcvt.lu.s a4, ft1, rtz
+; CHECK-NEXT: sext.w s1, a3
+; CHECK-NEXT: bltu a5, a1, .LBB16_8
+; CHECK-NEXT: # %bb.7: # %entry
+; CHECK-NEXT: mv a7, a1
+; CHECK-NEXT: .LBB16_8: # %entry
+; CHECK-NEXT: fmv.w.x ft1, s2
+; CHECK-NEXT: fcvt.lu.s a5, ft0, rtz
+; CHECK-NEXT: sext.w a0, a4
+; CHECK-NEXT: bltu s1, a1, .LBB16_10
+; CHECK-NEXT: # %bb.9: # %entry
+; CHECK-NEXT: mv a3, a1
+; CHECK-NEXT: .LBB16_10: # %entry
+; CHECK-NEXT: fcvt.lu.s s1, ft1, rtz
+; CHECK-NEXT: sext.w a2, a5
+; CHECK-NEXT: bgeu a0, a1, .LBB16_15
+; CHECK-NEXT: # %bb.11: # %entry
+; CHECK-NEXT: sext.w a0, s1
+; CHECK-NEXT: bgeu a2, a1, .LBB16_16
+; CHECK-NEXT: .LBB16_12: # %entry
+; CHECK-NEXT: bltu a0, a1, .LBB16_14
+; CHECK-NEXT: .LBB16_13: # %entry
+; CHECK-NEXT: mv s1, a1
+; CHECK-NEXT: .LBB16_14: # %entry
+; CHECK-NEXT: sh s1, 14(s0)
+; CHECK-NEXT: sh a5, 12(s0)
+; CHECK-NEXT: sh a4, 10(s0)
+; CHECK-NEXT: sh a3, 8(s0)
+; CHECK-NEXT: sh a7, 6(s0)
+; CHECK-NEXT: sh s7, 4(s0)
+; CHECK-NEXT: sh s8, 2(s0)
+; CHECK-NEXT: sh a6, 0(s0)
+; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 80(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 64(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s3, 56(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s4, 48(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s5, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s6, 32(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s7, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s8, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s9, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 96
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB16_15: # %entry
+; CHECK-NEXT: mv a4, a1
+; CHECK-NEXT: sext.w a0, s1
+; CHECK-NEXT: bltu a2, a1, .LBB16_12
+; CHECK-NEXT: .LBB16_16: # %entry
+; CHECK-NEXT: mv a5, a1
+; CHECK-NEXT: bgeu a0, a1, .LBB16_13
+; CHECK-NEXT: j .LBB16_14
+entry:
+ %conv = fptoui <8 x half> %x to <8 x i32>
+ %0 = icmp ult <8 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %spec.store.select = select <8 x i1> %0, <8 x i32> %conv, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %conv6 = trunc <8 x i32> %spec.store.select to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+define <8 x i16> @ustest_f16i16(<8 x half> %x) {
+; CHECK-LABEL: ustest_f16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -96
+; CHECK-NEXT: .cfi_def_cfa_offset 96
+; CHECK-NEXT: sd ra, 88(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 80(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 72(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 64(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s3, 56(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s4, 48(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s5, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s6, 32(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s7, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s8, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s9, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: .cfi_offset s3, -40
+; CHECK-NEXT: .cfi_offset s4, -48
+; CHECK-NEXT: .cfi_offset s5, -56
+; CHECK-NEXT: .cfi_offset s6, -64
+; CHECK-NEXT: .cfi_offset s7, -72
+; CHECK-NEXT: .cfi_offset s8, -80
+; CHECK-NEXT: .cfi_offset s9, -88
+; CHECK-NEXT: lhu s6, 56(a1)
+; CHECK-NEXT: lhu s2, 0(a1)
+; CHECK-NEXT: lhu s3, 8(a1)
+; CHECK-NEXT: lhu s4, 16(a1)
+; CHECK-NEXT: lhu s5, 24(a1)
+; CHECK-NEXT: lhu s1, 32(a1)
+; CHECK-NEXT: lhu s0, 40(a1)
+; CHECK-NEXT: lhu a1, 48(a1)
+; CHECK-NEXT: mv s8, a0
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s9, a0
+; CHECK-NEXT: mv a0, s0
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: mv a0, s1
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s7, a0
+; CHECK-NEXT: mv a0, s5
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s5, a0
+; CHECK-NEXT: mv a0, s4
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s4, a0
+; CHECK-NEXT: mv a0, s3
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s3, a0
+; CHECK-NEXT: mv a0, s2
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s2, a0
+; CHECK-NEXT: fmv.w.x ft0, s0
+; CHECK-NEXT: fsw ft0, 4(sp) # 4-byte Folded Spill
+; CHECK-NEXT: fmv.w.x ft0, s9
+; CHECK-NEXT: fcvt.l.s s9, ft0, rtz
+; CHECK-NEXT: mv a0, s6
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: fmv.w.x ft0, a0
+; CHECK-NEXT: fcvt.l.s a0, ft0, rtz
+; CHECK-NEXT: lui a1, 16
+; CHECK-NEXT: addiw s1, a1, -1
+; CHECK-NEXT: blt a0, s1, .LBB17_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a0, s1
+; CHECK-NEXT: .LBB17_2: # %entry
+; CHECK-NEXT: fmv.w.x ft1, s7
+; CHECK-NEXT: flw ft0, 4(sp) # 4-byte Folded Reload
+; CHECK-NEXT: fcvt.l.s a1, ft0, rtz
+; CHECK-NEXT: blt s9, s1, .LBB17_4
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: mv s9, s1
+; CHECK-NEXT: .LBB17_4: # %entry
+; CHECK-NEXT: fmv.w.x ft0, s5
+; CHECK-NEXT: fcvt.l.s a2, ft1, rtz
+; CHECK-NEXT: blt a1, s1, .LBB17_6
+; CHECK-NEXT: # %bb.5: # %entry
+; CHECK-NEXT: mv a1, s1
+; CHECK-NEXT: .LBB17_6: # %entry
+; CHECK-NEXT: fmv.w.x ft1, s4
+; CHECK-NEXT: fcvt.l.s a3, ft0, rtz
+; CHECK-NEXT: blt a2, s1, .LBB17_8
+; CHECK-NEXT: # %bb.7: # %entry
+; CHECK-NEXT: mv a2, s1
+; CHECK-NEXT: .LBB17_8: # %entry
+; CHECK-NEXT: fmv.w.x ft0, s3
+; CHECK-NEXT: fcvt.l.s a4, ft1, rtz
+; CHECK-NEXT: blt a3, s1, .LBB17_10
+; CHECK-NEXT: # %bb.9: # %entry
+; CHECK-NEXT: mv a3, s1
+; CHECK-NEXT: .LBB17_10: # %entry
+; CHECK-NEXT: fmv.w.x ft1, s2
+; CHECK-NEXT: fcvt.l.s a5, ft0, rtz
+; CHECK-NEXT: bge a4, s1, .LBB17_23
+; CHECK-NEXT: # %bb.11: # %entry
+; CHECK-NEXT: fcvt.l.s s0, ft1, rtz
+; CHECK-NEXT: bge a5, s1, .LBB17_24
+; CHECK-NEXT: .LBB17_12: # %entry
+; CHECK-NEXT: bge s0, s1, .LBB17_25
+; CHECK-NEXT: .LBB17_13: # %entry
+; CHECK-NEXT: blez s0, .LBB17_26
+; CHECK-NEXT: .LBB17_14: # %entry
+; CHECK-NEXT: blez a5, .LBB17_27
+; CHECK-NEXT: .LBB17_15: # %entry
+; CHECK-NEXT: blez a4, .LBB17_28
+; CHECK-NEXT: .LBB17_16: # %entry
+; CHECK-NEXT: blez a3, .LBB17_29
+; CHECK-NEXT: .LBB17_17: # %entry
+; CHECK-NEXT: blez a2, .LBB17_30
+; CHECK-NEXT: .LBB17_18: # %entry
+; CHECK-NEXT: blez a1, .LBB17_31
+; CHECK-NEXT: .LBB17_19: # %entry
+; CHECK-NEXT: blez s9, .LBB17_32
+; CHECK-NEXT: .LBB17_20: # %entry
+; CHECK-NEXT: bgtz a0, .LBB17_22
+; CHECK-NEXT: .LBB17_21: # %entry
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: .LBB17_22: # %entry
+; CHECK-NEXT: sh a0, 14(s8)
+; CHECK-NEXT: sh s9, 12(s8)
+; CHECK-NEXT: sh a1, 10(s8)
+; CHECK-NEXT: sh a2, 8(s8)
+; CHECK-NEXT: sh a3, 6(s8)
+; CHECK-NEXT: sh a4, 4(s8)
+; CHECK-NEXT: sh a5, 2(s8)
+; CHECK-NEXT: sh s0, 0(s8)
+; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 80(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 64(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s3, 56(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s4, 48(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s5, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s6, 32(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s7, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s8, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s9, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 96
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB17_23: # %entry
+; CHECK-NEXT: mv a4, s1
+; CHECK-NEXT: fcvt.l.s s0, ft1, rtz
+; CHECK-NEXT: blt a5, s1, .LBB17_12
+; CHECK-NEXT: .LBB17_24: # %entry
+; CHECK-NEXT: mv a5, s1
+; CHECK-NEXT: blt s0, s1, .LBB17_13
+; CHECK-NEXT: .LBB17_25: # %entry
+; CHECK-NEXT: mv s0, s1
+; CHECK-NEXT: bgtz s0, .LBB17_14
+; CHECK-NEXT: .LBB17_26: # %entry
+; CHECK-NEXT: li s0, 0
+; CHECK-NEXT: bgtz a5, .LBB17_15
+; CHECK-NEXT: .LBB17_27: # %entry
+; CHECK-NEXT: li a5, 0
+; CHECK-NEXT: bgtz a4, .LBB17_16
+; CHECK-NEXT: .LBB17_28: # %entry
+; CHECK-NEXT: li a4, 0
+; CHECK-NEXT: bgtz a3, .LBB17_17
+; CHECK-NEXT: .LBB17_29: # %entry
+; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: bgtz a2, .LBB17_18
+; CHECK-NEXT: .LBB17_30: # %entry
+; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: bgtz a1, .LBB17_19
+; CHECK-NEXT: .LBB17_31: # %entry
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: bgtz s9, .LBB17_20
+; CHECK-NEXT: .LBB17_32: # %entry
+; CHECK-NEXT: li s9, 0
+; CHECK-NEXT: blez a0, .LBB17_21
+; CHECK-NEXT: j .LBB17_22
+entry:
+ %conv = fptosi <8 x half> %x to <8 x i32>
+ %0 = icmp slt <8 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %spec.store.select = select <8 x i1> %0, <8 x i32> %conv, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %1 = icmp sgt <8 x i32> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <8 x i1> %1, <8 x i32> %spec.store.select, <8 x i32> zeroinitializer
+ %conv6 = trunc <8 x i32> %spec.store.select7 to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+; i64 saturate
+
+define <2 x i64> @stest_f64i64(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: mv s0, a1
+; CHECK-NEXT: call __fixdfti at plt
+; CHECK-NEXT: mv s2, a0
+; CHECK-NEXT: mv s1, a1
+; CHECK-NEXT: mv a0, s0
+; CHECK-NEXT: call __fixdfti at plt
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: srli a3, a0, 1
+; CHECK-NEXT: beqz a1, .LBB18_3
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: slti a4, a1, 0
+; CHECK-NEXT: bnez s1, .LBB18_4
+; CHECK-NEXT: .LBB18_2:
+; CHECK-NEXT: sltu a5, s2, a3
+; CHECK-NEXT: beqz a5, .LBB18_5
+; CHECK-NEXT: j .LBB18_6
+; CHECK-NEXT: .LBB18_3:
+; CHECK-NEXT: sltu a4, a2, a3
+; CHECK-NEXT: beqz s1, .LBB18_2
+; CHECK-NEXT: .LBB18_4: # %entry
+; CHECK-NEXT: slti a5, s1, 0
+; CHECK-NEXT: bnez a5, .LBB18_6
+; CHECK-NEXT: .LBB18_5: # %entry
+; CHECK-NEXT: li s1, 0
+; CHECK-NEXT: mv s2, a3
+; CHECK-NEXT: .LBB18_6: # %entry
+; CHECK-NEXT: beqz a4, .LBB18_10
+; CHECK-NEXT: # %bb.7: # %entry
+; CHECK-NEXT: slli a3, a0, 63
+; CHECK-NEXT: beq a1, a0, .LBB18_11
+; CHECK-NEXT: .LBB18_8: # %entry
+; CHECK-NEXT: slt a1, a0, a1
+; CHECK-NEXT: bne s1, a0, .LBB18_12
+; CHECK-NEXT: .LBB18_9:
+; CHECK-NEXT: sltu a0, a3, s2
+; CHECK-NEXT: beqz a0, .LBB18_13
+; CHECK-NEXT: j .LBB18_14
+; CHECK-NEXT: .LBB18_10: # %entry
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: slli a3, a0, 63
+; CHECK-NEXT: bne a1, a0, .LBB18_8
+; CHECK-NEXT: .LBB18_11:
+; CHECK-NEXT: sltu a1, a3, a2
+; CHECK-NEXT: beq s1, a0, .LBB18_9
+; CHECK-NEXT: .LBB18_12: # %entry
+; CHECK-NEXT: slt a0, a0, s1
+; CHECK-NEXT: bnez a0, .LBB18_14
+; CHECK-NEXT: .LBB18_13: # %entry
+; CHECK-NEXT: mv s2, a3
+; CHECK-NEXT: .LBB18_14: # %entry
+; CHECK-NEXT: bnez a1, .LBB18_16
+; CHECK-NEXT: # %bb.15: # %entry
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: .LBB18_16: # %entry
+; CHECK-NEXT: mv a0, s2
+; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 9223372036854775807, i128 9223372036854775807>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>
+ %1 = icmp sgt <2 x i128> %spec.store.select, <i128 -9223372036854775808, i128 -9223372036854775808>
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utest_f64i64(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: mv s1, a0
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: call __fixunsdfti at plt
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: mv s2, a1
+; CHECK-NEXT: mv a0, s1
+; CHECK-NEXT: call __fixunsdfti at plt
+; CHECK-NEXT: beqz a1, .LBB19_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: .LBB19_2: # %entry
+; CHECK-NEXT: beqz s2, .LBB19_4
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: li s0, 0
+; CHECK-NEXT: .LBB19_4: # %entry
+; CHECK-NEXT: mv a1, s0
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i128>
+ %0 = icmp ult <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f64i64(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: call __fixdfti at plt
+; CHECK-NEXT: mv s2, a0
+; CHECK-NEXT: mv s1, a1
+; CHECK-NEXT: mv a0, s0
+; CHECK-NEXT: call __fixdfti at plt
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: bgtz a1, .LBB20_7
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a3, s1
+; CHECK-NEXT: bgtz s1, .LBB20_8
+; CHECK-NEXT: .LBB20_2: # %entry
+; CHECK-NEXT: bgtz a1, .LBB20_9
+; CHECK-NEXT: .LBB20_3: # %entry
+; CHECK-NEXT: bgtz s1, .LBB20_10
+; CHECK-NEXT: .LBB20_4: # %entry
+; CHECK-NEXT: beqz a3, .LBB20_11
+; CHECK-NEXT: .LBB20_5: # %entry
+; CHECK-NEXT: sgtz a1, a3
+; CHECK-NEXT: bnez a2, .LBB20_12
+; CHECK-NEXT: .LBB20_6:
+; CHECK-NEXT: snez a2, a0
+; CHECK-NEXT: beqz a2, .LBB20_13
+; CHECK-NEXT: j .LBB20_14
+; CHECK-NEXT: .LBB20_7: # %entry
+; CHECK-NEXT: li a2, 1
+; CHECK-NEXT: mv a3, s1
+; CHECK-NEXT: blez s1, .LBB20_2
+; CHECK-NEXT: .LBB20_8: # %entry
+; CHECK-NEXT: li a3, 1
+; CHECK-NEXT: blez a1, .LBB20_3
+; CHECK-NEXT: .LBB20_9: # %entry
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: blez s1, .LBB20_4
+; CHECK-NEXT: .LBB20_10: # %entry
+; CHECK-NEXT: li s2, 0
+; CHECK-NEXT: bnez a3, .LBB20_5
+; CHECK-NEXT: .LBB20_11:
+; CHECK-NEXT: snez a1, s2
+; CHECK-NEXT: beqz a2, .LBB20_6
+; CHECK-NEXT: .LBB20_12: # %entry
+; CHECK-NEXT: sgtz a2, a2
+; CHECK-NEXT: bnez a2, .LBB20_14
+; CHECK-NEXT: .LBB20_13: # %entry
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: .LBB20_14: # %entry
+; CHECK-NEXT: bnez a1, .LBB20_16
+; CHECK-NEXT: # %bb.15: # %entry
+; CHECK-NEXT: li s2, 0
+; CHECK-NEXT: .LBB20_16: # %entry
+; CHECK-NEXT: mv a1, s2
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %1 = icmp sgt <2 x i128> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> zeroinitializer
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @stest_f32i64(<2 x float> %x) {
+; CHECK-LABEL: stest_f32i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: mv s0, a1
+; CHECK-NEXT: call __fixsfti at plt
+; CHECK-NEXT: mv s2, a0
+; CHECK-NEXT: mv s1, a1
+; CHECK-NEXT: mv a0, s0
+; CHECK-NEXT: call __fixsfti at plt
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: srli a3, a0, 1
+; CHECK-NEXT: beqz a1, .LBB21_3
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: slti a4, a1, 0
+; CHECK-NEXT: bnez s1, .LBB21_4
+; CHECK-NEXT: .LBB21_2:
+; CHECK-NEXT: sltu a5, s2, a3
+; CHECK-NEXT: beqz a5, .LBB21_5
+; CHECK-NEXT: j .LBB21_6
+; CHECK-NEXT: .LBB21_3:
+; CHECK-NEXT: sltu a4, a2, a3
+; CHECK-NEXT: beqz s1, .LBB21_2
+; CHECK-NEXT: .LBB21_4: # %entry
+; CHECK-NEXT: slti a5, s1, 0
+; CHECK-NEXT: bnez a5, .LBB21_6
+; CHECK-NEXT: .LBB21_5: # %entry
+; CHECK-NEXT: li s1, 0
+; CHECK-NEXT: mv s2, a3
+; CHECK-NEXT: .LBB21_6: # %entry
+; CHECK-NEXT: beqz a4, .LBB21_10
+; CHECK-NEXT: # %bb.7: # %entry
+; CHECK-NEXT: slli a3, a0, 63
+; CHECK-NEXT: beq a1, a0, .LBB21_11
+; CHECK-NEXT: .LBB21_8: # %entry
+; CHECK-NEXT: slt a1, a0, a1
+; CHECK-NEXT: bne s1, a0, .LBB21_12
+; CHECK-NEXT: .LBB21_9:
+; CHECK-NEXT: sltu a0, a3, s2
+; CHECK-NEXT: beqz a0, .LBB21_13
+; CHECK-NEXT: j .LBB21_14
+; CHECK-NEXT: .LBB21_10: # %entry
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: slli a3, a0, 63
+; CHECK-NEXT: bne a1, a0, .LBB21_8
+; CHECK-NEXT: .LBB21_11:
+; CHECK-NEXT: sltu a1, a3, a2
+; CHECK-NEXT: beq s1, a0, .LBB21_9
+; CHECK-NEXT: .LBB21_12: # %entry
+; CHECK-NEXT: slt a0, a0, s1
+; CHECK-NEXT: bnez a0, .LBB21_14
+; CHECK-NEXT: .LBB21_13: # %entry
+; CHECK-NEXT: mv s2, a3
+; CHECK-NEXT: .LBB21_14: # %entry
+; CHECK-NEXT: bnez a1, .LBB21_16
+; CHECK-NEXT: # %bb.15: # %entry
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: .LBB21_16: # %entry
+; CHECK-NEXT: mv a0, s2
+; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x float> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 9223372036854775807, i128 9223372036854775807>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>
+ %1 = icmp sgt <2 x i128> %spec.store.select, <i128 -9223372036854775808, i128 -9223372036854775808>
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utest_f32i64(<2 x float> %x) {
+; CHECK-LABEL: utest_f32i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: mv s1, a0
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: call __fixunssfti at plt
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: mv s2, a1
+; CHECK-NEXT: mv a0, s1
+; CHECK-NEXT: call __fixunssfti at plt
+; CHECK-NEXT: beqz a1, .LBB22_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: .LBB22_2: # %entry
+; CHECK-NEXT: beqz s2, .LBB22_4
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: li s0, 0
+; CHECK-NEXT: .LBB22_4: # %entry
+; CHECK-NEXT: mv a1, s0
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui <2 x float> %x to <2 x i128>
+ %0 = icmp ult <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f32i64(<2 x float> %x) {
+; CHECK-LABEL: ustest_f32i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: call __fixsfti at plt
+; CHECK-NEXT: mv s2, a0
+; CHECK-NEXT: mv s1, a1
+; CHECK-NEXT: mv a0, s0
+; CHECK-NEXT: call __fixsfti at plt
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: bgtz a1, .LBB23_7
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a3, s1
+; CHECK-NEXT: bgtz s1, .LBB23_8
+; CHECK-NEXT: .LBB23_2: # %entry
+; CHECK-NEXT: bgtz a1, .LBB23_9
+; CHECK-NEXT: .LBB23_3: # %entry
+; CHECK-NEXT: bgtz s1, .LBB23_10
+; CHECK-NEXT: .LBB23_4: # %entry
+; CHECK-NEXT: beqz a3, .LBB23_11
+; CHECK-NEXT: .LBB23_5: # %entry
+; CHECK-NEXT: sgtz a1, a3
+; CHECK-NEXT: bnez a2, .LBB23_12
+; CHECK-NEXT: .LBB23_6:
+; CHECK-NEXT: snez a2, a0
+; CHECK-NEXT: beqz a2, .LBB23_13
+; CHECK-NEXT: j .LBB23_14
+; CHECK-NEXT: .LBB23_7: # %entry
+; CHECK-NEXT: li a2, 1
+; CHECK-NEXT: mv a3, s1
+; CHECK-NEXT: blez s1, .LBB23_2
+; CHECK-NEXT: .LBB23_8: # %entry
+; CHECK-NEXT: li a3, 1
+; CHECK-NEXT: blez a1, .LBB23_3
+; CHECK-NEXT: .LBB23_9: # %entry
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: blez s1, .LBB23_4
+; CHECK-NEXT: .LBB23_10: # %entry
+; CHECK-NEXT: li s2, 0
+; CHECK-NEXT: bnez a3, .LBB23_5
+; CHECK-NEXT: .LBB23_11:
+; CHECK-NEXT: snez a1, s2
+; CHECK-NEXT: beqz a2, .LBB23_6
+; CHECK-NEXT: .LBB23_12: # %entry
+; CHECK-NEXT: sgtz a2, a2
+; CHECK-NEXT: bnez a2, .LBB23_14
+; CHECK-NEXT: .LBB23_13: # %entry
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: .LBB23_14: # %entry
+; CHECK-NEXT: bnez a1, .LBB23_16
+; CHECK-NEXT: # %bb.15: # %entry
+; CHECK-NEXT: li s2, 0
+; CHECK-NEXT: .LBB23_16: # %entry
+; CHECK-NEXT: mv a1, s2
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x float> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %1 = icmp sgt <2 x i128> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> zeroinitializer
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @stest_f16i64(<2 x half> %x) {
+; CHECK-LABEL: stest_f16i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: mv s0, a1
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: call __fixsfti at plt
+; CHECK-NEXT: mv s2, a0
+; CHECK-NEXT: mv s1, a1
+; CHECK-NEXT: mv a0, s0
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: call __fixsfti at plt
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: srli a3, a0, 1
+; CHECK-NEXT: beqz a1, .LBB24_3
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: slti a4, a1, 0
+; CHECK-NEXT: bnez s1, .LBB24_4
+; CHECK-NEXT: .LBB24_2:
+; CHECK-NEXT: sltu a5, s2, a3
+; CHECK-NEXT: beqz a5, .LBB24_5
+; CHECK-NEXT: j .LBB24_6
+; CHECK-NEXT: .LBB24_3:
+; CHECK-NEXT: sltu a4, a2, a3
+; CHECK-NEXT: beqz s1, .LBB24_2
+; CHECK-NEXT: .LBB24_4: # %entry
+; CHECK-NEXT: slti a5, s1, 0
+; CHECK-NEXT: bnez a5, .LBB24_6
+; CHECK-NEXT: .LBB24_5: # %entry
+; CHECK-NEXT: li s1, 0
+; CHECK-NEXT: mv s2, a3
+; CHECK-NEXT: .LBB24_6: # %entry
+; CHECK-NEXT: beqz a4, .LBB24_10
+; CHECK-NEXT: # %bb.7: # %entry
+; CHECK-NEXT: slli a3, a0, 63
+; CHECK-NEXT: beq a1, a0, .LBB24_11
+; CHECK-NEXT: .LBB24_8: # %entry
+; CHECK-NEXT: slt a1, a0, a1
+; CHECK-NEXT: bne s1, a0, .LBB24_12
+; CHECK-NEXT: .LBB24_9:
+; CHECK-NEXT: sltu a0, a3, s2
+; CHECK-NEXT: beqz a0, .LBB24_13
+; CHECK-NEXT: j .LBB24_14
+; CHECK-NEXT: .LBB24_10: # %entry
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: slli a3, a0, 63
+; CHECK-NEXT: bne a1, a0, .LBB24_8
+; CHECK-NEXT: .LBB24_11:
+; CHECK-NEXT: sltu a1, a3, a2
+; CHECK-NEXT: beq s1, a0, .LBB24_9
+; CHECK-NEXT: .LBB24_12: # %entry
+; CHECK-NEXT: slt a0, a0, s1
+; CHECK-NEXT: bnez a0, .LBB24_14
+; CHECK-NEXT: .LBB24_13: # %entry
+; CHECK-NEXT: mv s2, a3
+; CHECK-NEXT: .LBB24_14: # %entry
+; CHECK-NEXT: bnez a1, .LBB24_16
+; CHECK-NEXT: # %bb.15: # %entry
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: .LBB24_16: # %entry
+; CHECK-NEXT: mv a0, s2
+; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x half> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 9223372036854775807, i128 9223372036854775807>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>
+ %1 = icmp sgt <2 x i128> %spec.store.select, <i128 -9223372036854775808, i128 -9223372036854775808>
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utesth_f16i64(<2 x half> %x) {
+; CHECK-LABEL: utesth_f16i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: mv s1, a0
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: call __fixunssfti at plt
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: mv s2, a1
+; CHECK-NEXT: mv a0, s1
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: call __fixunssfti at plt
+; CHECK-NEXT: beqz a1, .LBB25_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: .LBB25_2: # %entry
+; CHECK-NEXT: beqz s2, .LBB25_4
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: li s0, 0
+; CHECK-NEXT: .LBB25_4: # %entry
+; CHECK-NEXT: mv a1, s0
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui <2 x half> %x to <2 x i128>
+ %0 = icmp ult <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f16i64(<2 x half> %x) {
+; CHECK-LABEL: ustest_f16i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: call __fixsfti at plt
+; CHECK-NEXT: mv s2, a0
+; CHECK-NEXT: mv s1, a1
+; CHECK-NEXT: mv a0, s0
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: call __fixsfti at plt
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: bgtz a1, .LBB26_7
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a3, s1
+; CHECK-NEXT: bgtz s1, .LBB26_8
+; CHECK-NEXT: .LBB26_2: # %entry
+; CHECK-NEXT: bgtz a1, .LBB26_9
+; CHECK-NEXT: .LBB26_3: # %entry
+; CHECK-NEXT: bgtz s1, .LBB26_10
+; CHECK-NEXT: .LBB26_4: # %entry
+; CHECK-NEXT: beqz a3, .LBB26_11
+; CHECK-NEXT: .LBB26_5: # %entry
+; CHECK-NEXT: sgtz a1, a3
+; CHECK-NEXT: bnez a2, .LBB26_12
+; CHECK-NEXT: .LBB26_6:
+; CHECK-NEXT: snez a2, a0
+; CHECK-NEXT: beqz a2, .LBB26_13
+; CHECK-NEXT: j .LBB26_14
+; CHECK-NEXT: .LBB26_7: # %entry
+; CHECK-NEXT: li a2, 1
+; CHECK-NEXT: mv a3, s1
+; CHECK-NEXT: blez s1, .LBB26_2
+; CHECK-NEXT: .LBB26_8: # %entry
+; CHECK-NEXT: li a3, 1
+; CHECK-NEXT: blez a1, .LBB26_3
+; CHECK-NEXT: .LBB26_9: # %entry
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: blez s1, .LBB26_4
+; CHECK-NEXT: .LBB26_10: # %entry
+; CHECK-NEXT: li s2, 0
+; CHECK-NEXT: bnez a3, .LBB26_5
+; CHECK-NEXT: .LBB26_11:
+; CHECK-NEXT: snez a1, s2
+; CHECK-NEXT: beqz a2, .LBB26_6
+; CHECK-NEXT: .LBB26_12: # %entry
+; CHECK-NEXT: sgtz a2, a2
+; CHECK-NEXT: bnez a2, .LBB26_14
+; CHECK-NEXT: .LBB26_13: # %entry
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: .LBB26_14: # %entry
+; CHECK-NEXT: bnez a1, .LBB26_16
+; CHECK-NEXT: # %bb.15: # %entry
+; CHECK-NEXT: li s2, 0
+; CHECK-NEXT: .LBB26_16: # %entry
+; CHECK-NEXT: mv a1, s2
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x half> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %1 = icmp sgt <2 x i128> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> zeroinitializer
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+
+
+; i32 saturate
+
+define <2 x i32> @stest_f64i32_mm(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i32_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.d.x ft0, a1
+; CHECK-NEXT: fmv.d.x ft1, a0
+; CHECK-NEXT: fcvt.l.d a1, ft0, rtz
+; CHECK-NEXT: lui a2, 524288
+; CHECK-NEXT: addiw a3, a2, -1
+; CHECK-NEXT: fcvt.l.d a0, ft1, rtz
+; CHECK-NEXT: bge a1, a3, .LBB27_5
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: bge a0, a3, .LBB27_6
+; CHECK-NEXT: .LBB27_2: # %entry
+; CHECK-NEXT: bge a2, a0, .LBB27_7
+; CHECK-NEXT: .LBB27_3: # %entry
+; CHECK-NEXT: bge a2, a1, .LBB27_8
+; CHECK-NEXT: .LBB27_4: # %entry
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB27_5: # %entry
+; CHECK-NEXT: mv a1, a3
+; CHECK-NEXT: blt a0, a3, .LBB27_2
+; CHECK-NEXT: .LBB27_6: # %entry
+; CHECK-NEXT: mv a0, a3
+; CHECK-NEXT: blt a2, a0, .LBB27_3
+; CHECK-NEXT: .LBB27_7: # %entry
+; CHECK-NEXT: lui a0, 524288
+; CHECK-NEXT: blt a2, a1, .LBB27_4
+; CHECK-NEXT: .LBB27_8: # %entry
+; CHECK-NEXT: lui a1, 524288
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i64>
+ %spec.store.select = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %conv, <2 x i64> <i64 2147483647, i64 2147483647>)
+ %spec.store.select7 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %spec.store.select, <2 x i64> <i64 -2147483648, i64 -2147483648>)
+ %conv6 = trunc <2 x i64> %spec.store.select7 to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <2 x i32> @utest_f64i32_mm(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i32_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.d.x ft0, a0
+; CHECK-NEXT: fmv.d.x ft1, a1
+; CHECK-NEXT: fcvt.lu.d a0, ft0, rtz
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: srli a2, a1, 32
+; CHECK-NEXT: fcvt.lu.d a1, ft1, rtz
+; CHECK-NEXT: bgeu a0, a2, .LBB28_3
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: bgeu a1, a2, .LBB28_4
+; CHECK-NEXT: .LBB28_2: # %entry
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB28_3: # %entry
+; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: bltu a1, a2, .LBB28_2
+; CHECK-NEXT: .LBB28_4: # %entry
+; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i64>
+ %spec.store.select = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %conv, <2 x i64> <i64 4294967295, i64 4294967295>)
+ %conv6 = trunc <2 x i64> %spec.store.select to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i32_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.d.x ft0, a1
+; CHECK-NEXT: fmv.d.x ft1, a0
+; CHECK-NEXT: fcvt.l.d a1, ft0, rtz
+; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: srli a2, a0, 32
+; CHECK-NEXT: fcvt.l.d a0, ft1, rtz
+; CHECK-NEXT: bge a1, a2, .LBB29_5
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: bge a0, a2, .LBB29_6
+; CHECK-NEXT: .LBB29_2: # %entry
+; CHECK-NEXT: blez a0, .LBB29_7
+; CHECK-NEXT: .LBB29_3: # %entry
+; CHECK-NEXT: blez a1, .LBB29_8
+; CHECK-NEXT: .LBB29_4: # %entry
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB29_5: # %entry
+; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: blt a0, a2, .LBB29_2
+; CHECK-NEXT: .LBB29_6: # %entry
+; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: bgtz a0, .LBB29_3
+; CHECK-NEXT: .LBB29_7: # %entry
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: bgtz a1, .LBB29_4
+; CHECK-NEXT: .LBB29_8: # %entry
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i64>
+ %spec.store.select = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %conv, <2 x i64> <i64 4294967295, i64 4294967295>)
+ %spec.store.select7 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %spec.store.select, <2 x i64> zeroinitializer)
+ %conv6 = trunc <2 x i64> %spec.store.select7 to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <4 x i32> @stest_f32i32_mm(<4 x float> %x) {
+; CHECK-LABEL: stest_f32i32_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.w.x ft1, a4
+; CHECK-NEXT: fmv.w.x ft2, a3
+; CHECK-NEXT: fmv.w.x ft0, a2
+; CHECK-NEXT: fcvt.l.s a2, ft1, rtz
+; CHECK-NEXT: lui a6, 524288
+; CHECK-NEXT: addiw a5, a6, -1
+; CHECK-NEXT: fcvt.l.s a3, ft2, rtz
+; CHECK-NEXT: blt a2, a5, .LBB30_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a2, a5
+; CHECK-NEXT: .LBB30_2: # %entry
+; CHECK-NEXT: fmv.w.x ft1, a1
+; CHECK-NEXT: fcvt.l.s a1, ft0, rtz
+; CHECK-NEXT: bge a3, a5, .LBB30_11
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: fcvt.l.s a4, ft1, rtz
+; CHECK-NEXT: bge a1, a5, .LBB30_12
+; CHECK-NEXT: .LBB30_4: # %entry
+; CHECK-NEXT: bge a4, a5, .LBB30_13
+; CHECK-NEXT: .LBB30_5: # %entry
+; CHECK-NEXT: bge a6, a4, .LBB30_14
+; CHECK-NEXT: .LBB30_6: # %entry
+; CHECK-NEXT: bge a6, a1, .LBB30_15
+; CHECK-NEXT: .LBB30_7: # %entry
+; CHECK-NEXT: bge a6, a3, .LBB30_16
+; CHECK-NEXT: .LBB30_8: # %entry
+; CHECK-NEXT: blt a6, a2, .LBB30_10
+; CHECK-NEXT: .LBB30_9: # %entry
+; CHECK-NEXT: lui a2, 524288
+; CHECK-NEXT: .LBB30_10: # %entry
+; CHECK-NEXT: sw a2, 12(a0)
+; CHECK-NEXT: sw a3, 8(a0)
+; CHECK-NEXT: sw a1, 4(a0)
+; CHECK-NEXT: sw a4, 0(a0)
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB30_11: # %entry
+; CHECK-NEXT: mv a3, a5
+; CHECK-NEXT: fcvt.l.s a4, ft1, rtz
+; CHECK-NEXT: blt a1, a5, .LBB30_4
+; CHECK-NEXT: .LBB30_12: # %entry
+; CHECK-NEXT: mv a1, a5
+; CHECK-NEXT: blt a4, a5, .LBB30_5
+; CHECK-NEXT: .LBB30_13: # %entry
+; CHECK-NEXT: mv a4, a5
+; CHECK-NEXT: blt a6, a4, .LBB30_6
+; CHECK-NEXT: .LBB30_14: # %entry
+; CHECK-NEXT: lui a4, 524288
+; CHECK-NEXT: blt a6, a1, .LBB30_7
+; CHECK-NEXT: .LBB30_15: # %entry
+; CHECK-NEXT: lui a1, 524288
+; CHECK-NEXT: blt a6, a3, .LBB30_8
+; CHECK-NEXT: .LBB30_16: # %entry
+; CHECK-NEXT: lui a3, 524288
+; CHECK-NEXT: bge a6, a2, .LBB30_9
+; CHECK-NEXT: j .LBB30_10
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
+ %spec.store.select7 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %spec.store.select, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @utest_f32i32_mm(<4 x float> %x) {
+; CHECK-LABEL: utest_f32i32_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.w.x ft1, a1
+; CHECK-NEXT: fmv.w.x ft2, a2
+; CHECK-NEXT: fmv.w.x ft0, a3
+; CHECK-NEXT: fcvt.lu.s a1, ft1, rtz
+; CHECK-NEXT: li a2, -1
+; CHECK-NEXT: srli a3, a2, 32
+; CHECK-NEXT: fcvt.lu.s a2, ft2, rtz
+; CHECK-NEXT: bltu a1, a3, .LBB31_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a1, a3
+; CHECK-NEXT: .LBB31_2: # %entry
+; CHECK-NEXT: fmv.w.x ft1, a4
+; CHECK-NEXT: fcvt.lu.s a4, ft0, rtz
+; CHECK-NEXT: bgeu a2, a3, .LBB31_7
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: fcvt.lu.s a5, ft1, rtz
+; CHECK-NEXT: bgeu a4, a3, .LBB31_8
+; CHECK-NEXT: .LBB31_4: # %entry
+; CHECK-NEXT: bltu a5, a3, .LBB31_6
+; CHECK-NEXT: .LBB31_5: # %entry
+; CHECK-NEXT: mv a5, a3
+; CHECK-NEXT: .LBB31_6: # %entry
+; CHECK-NEXT: sw a5, 12(a0)
+; CHECK-NEXT: sw a4, 8(a0)
+; CHECK-NEXT: sw a2, 4(a0)
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB31_7: # %entry
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: fcvt.lu.s a5, ft1, rtz
+; CHECK-NEXT: bltu a4, a3, .LBB31_4
+; CHECK-NEXT: .LBB31_8: # %entry
+; CHECK-NEXT: mv a4, a3
+; CHECK-NEXT: bgeu a5, a3, .LBB31_5
+; CHECK-NEXT: j .LBB31_6
+entry:
+ %conv = fptoui <4 x float> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
+ %conv6 = trunc <4 x i64> %spec.store.select to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
+; CHECK-LABEL: ustest_f32i32_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.w.x ft1, a4
+; CHECK-NEXT: fmv.w.x ft2, a3
+; CHECK-NEXT: fmv.w.x ft0, a2
+; CHECK-NEXT: fcvt.l.s a2, ft1, rtz
+; CHECK-NEXT: li a3, -1
+; CHECK-NEXT: srli a5, a3, 32
+; CHECK-NEXT: fcvt.l.s a3, ft2, rtz
+; CHECK-NEXT: blt a2, a5, .LBB32_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a2, a5
+; CHECK-NEXT: .LBB32_2: # %entry
+; CHECK-NEXT: fmv.w.x ft1, a1
+; CHECK-NEXT: fcvt.l.s a1, ft0, rtz
+; CHECK-NEXT: bge a3, a5, .LBB32_11
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: fcvt.l.s a4, ft1, rtz
+; CHECK-NEXT: bge a1, a5, .LBB32_12
+; CHECK-NEXT: .LBB32_4: # %entry
+; CHECK-NEXT: bge a4, a5, .LBB32_13
+; CHECK-NEXT: .LBB32_5: # %entry
+; CHECK-NEXT: blez a4, .LBB32_14
+; CHECK-NEXT: .LBB32_6: # %entry
+; CHECK-NEXT: blez a1, .LBB32_15
+; CHECK-NEXT: .LBB32_7: # %entry
+; CHECK-NEXT: blez a3, .LBB32_16
+; CHECK-NEXT: .LBB32_8: # %entry
+; CHECK-NEXT: bgtz a2, .LBB32_10
+; CHECK-NEXT: .LBB32_9: # %entry
+; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: .LBB32_10: # %entry
+; CHECK-NEXT: sw a2, 12(a0)
+; CHECK-NEXT: sw a3, 8(a0)
+; CHECK-NEXT: sw a1, 4(a0)
+; CHECK-NEXT: sw a4, 0(a0)
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB32_11: # %entry
+; CHECK-NEXT: mv a3, a5
+; CHECK-NEXT: fcvt.l.s a4, ft1, rtz
+; CHECK-NEXT: blt a1, a5, .LBB32_4
+; CHECK-NEXT: .LBB32_12: # %entry
+; CHECK-NEXT: mv a1, a5
+; CHECK-NEXT: blt a4, a5, .LBB32_5
+; CHECK-NEXT: .LBB32_13: # %entry
+; CHECK-NEXT: mv a4, a5
+; CHECK-NEXT: bgtz a4, .LBB32_6
+; CHECK-NEXT: .LBB32_14: # %entry
+; CHECK-NEXT: li a4, 0
+; CHECK-NEXT: bgtz a1, .LBB32_7
+; CHECK-NEXT: .LBB32_15: # %entry
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: bgtz a3, .LBB32_8
+; CHECK-NEXT: .LBB32_16: # %entry
+; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: blez a2, .LBB32_9
+; CHECK-NEXT: j .LBB32_10
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
+ %spec.store.select7 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %spec.store.select, <4 x i64> zeroinitializer)
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
+; CHECK-LABEL: stest_f16i32_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: .cfi_offset s3, -40
+; CHECK-NEXT: .cfi_offset s4, -48
+; CHECK-NEXT: lhu s2, 24(a1)
+; CHECK-NEXT: lhu s4, 0(a1)
+; CHECK-NEXT: lhu s0, 8(a1)
+; CHECK-NEXT: lhu a1, 16(a1)
+; CHECK-NEXT: mv s3, a0
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s1, a0
+; CHECK-NEXT: mv a0, s0
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: mv a0, s4
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s4, a0
+; CHECK-NEXT: fmv.w.x ft0, s0
+; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill
+; CHECK-NEXT: fmv.w.x ft0, s1
+; CHECK-NEXT: fcvt.l.s s0, ft0, rtz
+; CHECK-NEXT: mv a0, s2
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: fmv.w.x ft0, a0
+; CHECK-NEXT: fcvt.l.s a0, ft0, rtz
+; CHECK-NEXT: lui a1, 524288
+; CHECK-NEXT: addiw a4, a1, -1
+; CHECK-NEXT: blt a0, a4, .LBB33_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a0, a4
+; CHECK-NEXT: .LBB33_2: # %entry
+; CHECK-NEXT: fmv.w.x ft0, s4
+; CHECK-NEXT: flw ft1, 12(sp) # 4-byte Folded Reload
+; CHECK-NEXT: fcvt.l.s a2, ft1, rtz
+; CHECK-NEXT: bge s0, a4, .LBB33_11
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: fcvt.l.s a3, ft0, rtz
+; CHECK-NEXT: bge a2, a4, .LBB33_12
+; CHECK-NEXT: .LBB33_4: # %entry
+; CHECK-NEXT: bge a3, a4, .LBB33_13
+; CHECK-NEXT: .LBB33_5: # %entry
+; CHECK-NEXT: bge a1, a3, .LBB33_14
+; CHECK-NEXT: .LBB33_6: # %entry
+; CHECK-NEXT: bge a1, a2, .LBB33_15
+; CHECK-NEXT: .LBB33_7: # %entry
+; CHECK-NEXT: bge a1, s0, .LBB33_16
+; CHECK-NEXT: .LBB33_8: # %entry
+; CHECK-NEXT: blt a1, a0, .LBB33_10
+; CHECK-NEXT: .LBB33_9: # %entry
+; CHECK-NEXT: lui a0, 524288
+; CHECK-NEXT: .LBB33_10: # %entry
+; CHECK-NEXT: sw a0, 12(s3)
+; CHECK-NEXT: sw s0, 8(s3)
+; CHECK-NEXT: sw a2, 4(s3)
+; CHECK-NEXT: sw a3, 0(s3)
+; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s3, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s4, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 64
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB33_11: # %entry
+; CHECK-NEXT: mv s0, a4
+; CHECK-NEXT: fcvt.l.s a3, ft0, rtz
+; CHECK-NEXT: blt a2, a4, .LBB33_4
+; CHECK-NEXT: .LBB33_12: # %entry
+; CHECK-NEXT: mv a2, a4
+; CHECK-NEXT: blt a3, a4, .LBB33_5
+; CHECK-NEXT: .LBB33_13: # %entry
+; CHECK-NEXT: mv a3, a4
+; CHECK-NEXT: blt a1, a3, .LBB33_6
+; CHECK-NEXT: .LBB33_14: # %entry
+; CHECK-NEXT: lui a3, 524288
+; CHECK-NEXT: blt a1, a2, .LBB33_7
+; CHECK-NEXT: .LBB33_15: # %entry
+; CHECK-NEXT: lui a2, 524288
+; CHECK-NEXT: blt a1, s0, .LBB33_8
+; CHECK-NEXT: .LBB33_16: # %entry
+; CHECK-NEXT: lui s0, 524288
+; CHECK-NEXT: bge a1, a0, .LBB33_9
+; CHECK-NEXT: j .LBB33_10
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
+ %spec.store.select7 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %spec.store.select, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
+; CHECK-LABEL: utesth_f16i32_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: .cfi_offset s3, -40
+; CHECK-NEXT: .cfi_offset s4, -48
+; CHECK-NEXT: lhu s2, 0(a1)
+; CHECK-NEXT: lhu s3, 24(a1)
+; CHECK-NEXT: lhu s1, 16(a1)
+; CHECK-NEXT: lhu a1, 8(a1)
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s4, a0
+; CHECK-NEXT: mv a0, s1
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s1, a0
+; CHECK-NEXT: mv a0, s3
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s3, a0
+; CHECK-NEXT: fmv.w.x ft0, s1
+; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill
+; CHECK-NEXT: fmv.w.x ft0, s4
+; CHECK-NEXT: fcvt.lu.s s1, ft0, rtz
+; CHECK-NEXT: mv a0, s2
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: fmv.w.x ft0, a0
+; CHECK-NEXT: fcvt.lu.s a0, ft0, rtz
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: srli a1, a1, 32
+; CHECK-NEXT: bltu a0, a1, .LBB34_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB34_2: # %entry
+; CHECK-NEXT: fmv.w.x ft0, s3
+; CHECK-NEXT: flw ft1, 12(sp) # 4-byte Folded Reload
+; CHECK-NEXT: fcvt.lu.s a2, ft1, rtz
+; CHECK-NEXT: bgeu s1, a1, .LBB34_7
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: fcvt.lu.s a3, ft0, rtz
+; CHECK-NEXT: bgeu a2, a1, .LBB34_8
+; CHECK-NEXT: .LBB34_4: # %entry
+; CHECK-NEXT: bltu a3, a1, .LBB34_6
+; CHECK-NEXT: .LBB34_5: # %entry
+; CHECK-NEXT: mv a3, a1
+; CHECK-NEXT: .LBB34_6: # %entry
+; CHECK-NEXT: sw a3, 12(s0)
+; CHECK-NEXT: sw a2, 8(s0)
+; CHECK-NEXT: sw s1, 4(s0)
+; CHECK-NEXT: sw a0, 0(s0)
+; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s3, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s4, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 64
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB34_7: # %entry
+; CHECK-NEXT: mv s1, a1
+; CHECK-NEXT: fcvt.lu.s a3, ft0, rtz
+; CHECK-NEXT: bltu a2, a1, .LBB34_4
+; CHECK-NEXT: .LBB34_8: # %entry
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: bgeu a3, a1, .LBB34_5
+; CHECK-NEXT: j .LBB34_6
+entry:
+ %conv = fptoui <4 x half> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
+ %conv6 = trunc <4 x i64> %spec.store.select to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
+; CHECK-LABEL: ustest_f16i32_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: .cfi_offset s3, -40
+; CHECK-NEXT: .cfi_offset s4, -48
+; CHECK-NEXT: lhu s2, 24(a1)
+; CHECK-NEXT: lhu s4, 0(a1)
+; CHECK-NEXT: lhu s0, 8(a1)
+; CHECK-NEXT: lhu a1, 16(a1)
+; CHECK-NEXT: mv s3, a0
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s1, a0
+; CHECK-NEXT: mv a0, s0
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: mv a0, s4
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s4, a0
+; CHECK-NEXT: fmv.w.x ft0, s0
+; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill
+; CHECK-NEXT: fmv.w.x ft0, s1
+; CHECK-NEXT: fcvt.l.s s0, ft0, rtz
+; CHECK-NEXT: mv a0, s2
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: fmv.w.x ft0, a0
+; CHECK-NEXT: fcvt.l.s a0, ft0, rtz
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: srli a3, a1, 32
+; CHECK-NEXT: blt a0, a3, .LBB35_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a0, a3
+; CHECK-NEXT: .LBB35_2: # %entry
+; CHECK-NEXT: fmv.w.x ft0, s4
+; CHECK-NEXT: flw ft1, 12(sp) # 4-byte Folded Reload
+; CHECK-NEXT: fcvt.l.s a1, ft1, rtz
+; CHECK-NEXT: bge s0, a3, .LBB35_11
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: fcvt.l.s a2, ft0, rtz
+; CHECK-NEXT: bge a1, a3, .LBB35_12
+; CHECK-NEXT: .LBB35_4: # %entry
+; CHECK-NEXT: bge a2, a3, .LBB35_13
+; CHECK-NEXT: .LBB35_5: # %entry
+; CHECK-NEXT: blez a2, .LBB35_14
+; CHECK-NEXT: .LBB35_6: # %entry
+; CHECK-NEXT: blez a1, .LBB35_15
+; CHECK-NEXT: .LBB35_7: # %entry
+; CHECK-NEXT: blez s0, .LBB35_16
+; CHECK-NEXT: .LBB35_8: # %entry
+; CHECK-NEXT: bgtz a0, .LBB35_10
+; CHECK-NEXT: .LBB35_9: # %entry
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: .LBB35_10: # %entry
+; CHECK-NEXT: sw a0, 12(s3)
+; CHECK-NEXT: sw s0, 8(s3)
+; CHECK-NEXT: sw a1, 4(s3)
+; CHECK-NEXT: sw a2, 0(s3)
+; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s3, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s4, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 64
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB35_11: # %entry
+; CHECK-NEXT: mv s0, a3
+; CHECK-NEXT: fcvt.l.s a2, ft0, rtz
+; CHECK-NEXT: blt a1, a3, .LBB35_4
+; CHECK-NEXT: .LBB35_12: # %entry
+; CHECK-NEXT: mv a1, a3
+; CHECK-NEXT: blt a2, a3, .LBB35_5
+; CHECK-NEXT: .LBB35_13: # %entry
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: bgtz a2, .LBB35_6
+; CHECK-NEXT: .LBB35_14: # %entry
+; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: bgtz a1, .LBB35_7
+; CHECK-NEXT: .LBB35_15: # %entry
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: bgtz s0, .LBB35_8
+; CHECK-NEXT: .LBB35_16: # %entry
+; CHECK-NEXT: li s0, 0
+; CHECK-NEXT: blez a0, .LBB35_9
+; CHECK-NEXT: j .LBB35_10
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
+ %spec.store.select7 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %spec.store.select, <4 x i64> zeroinitializer)
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+; i16 saturate
+
+define <2 x i16> @stest_f64i16_mm(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i16_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.d.x ft0, a1
+; CHECK-NEXT: fmv.d.x ft1, a0
+; CHECK-NEXT: fcvt.w.d a1, ft0, rtz
+; CHECK-NEXT: lui a0, 8
+; CHECK-NEXT: addiw a2, a0, -1
+; CHECK-NEXT: fcvt.w.d a0, ft1, rtz
+; CHECK-NEXT: bge a1, a2, .LBB36_5
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: bge a0, a2, .LBB36_6
+; CHECK-NEXT: .LBB36_2: # %entry
+; CHECK-NEXT: lui a2, 1048568
+; CHECK-NEXT: bge a2, a0, .LBB36_7
+; CHECK-NEXT: .LBB36_3: # %entry
+; CHECK-NEXT: bge a2, a1, .LBB36_8
+; CHECK-NEXT: .LBB36_4: # %entry
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB36_5: # %entry
+; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: blt a0, a2, .LBB36_2
+; CHECK-NEXT: .LBB36_6: # %entry
+; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: lui a2, 1048568
+; CHECK-NEXT: blt a2, a0, .LBB36_3
+; CHECK-NEXT: .LBB36_7: # %entry
+; CHECK-NEXT: lui a0, 1048568
+; CHECK-NEXT: blt a2, a1, .LBB36_4
+; CHECK-NEXT: .LBB36_8: # %entry
+; CHECK-NEXT: lui a1, 1048568
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i32>
+ %spec.store.select = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %conv, <2 x i32> <i32 32767, i32 32767>)
+ %spec.store.select7 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %spec.store.select, <2 x i32> <i32 -32768, i32 -32768>)
+ %conv6 = trunc <2 x i32> %spec.store.select7 to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <2 x i16> @utest_f64i16_mm(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i16_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.d.x ft0, a0
+; CHECK-NEXT: fmv.d.x ft1, a1
+; CHECK-NEXT: fcvt.wu.d a0, ft0, rtz
+; CHECK-NEXT: lui a1, 16
+; CHECK-NEXT: addiw a2, a1, -1
+; CHECK-NEXT: fcvt.wu.d a1, ft1, rtz
+; CHECK-NEXT: bgeu a0, a2, .LBB37_3
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: bgeu a1, a2, .LBB37_4
+; CHECK-NEXT: .LBB37_2: # %entry
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB37_3: # %entry
+; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: bltu a1, a2, .LBB37_2
+; CHECK-NEXT: .LBB37_4: # %entry
+; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: ret
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i32>
+ %spec.store.select = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %conv, <2 x i32> <i32 65535, i32 65535>)
+ %conv6 = trunc <2 x i32> %spec.store.select to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <2 x i16> @ustest_f64i16_mm(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i16_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.d.x ft0, a1
+; CHECK-NEXT: fmv.d.x ft1, a0
+; CHECK-NEXT: fcvt.w.d a1, ft0, rtz
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addiw a2, a0, -1
+; CHECK-NEXT: fcvt.w.d a0, ft1, rtz
+; CHECK-NEXT: bge a1, a2, .LBB38_5
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: bge a0, a2, .LBB38_6
+; CHECK-NEXT: .LBB38_2: # %entry
+; CHECK-NEXT: blez a0, .LBB38_7
+; CHECK-NEXT: .LBB38_3: # %entry
+; CHECK-NEXT: blez a1, .LBB38_8
+; CHECK-NEXT: .LBB38_4: # %entry
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB38_5: # %entry
+; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: blt a0, a2, .LBB38_2
+; CHECK-NEXT: .LBB38_6: # %entry
+; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: bgtz a0, .LBB38_3
+; CHECK-NEXT: .LBB38_7: # %entry
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: bgtz a1, .LBB38_4
+; CHECK-NEXT: .LBB38_8: # %entry
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: ret
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i32>
+ %spec.store.select = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %conv, <2 x i32> <i32 65535, i32 65535>)
+ %spec.store.select7 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %spec.store.select, <2 x i32> zeroinitializer)
+ %conv6 = trunc <2 x i32> %spec.store.select7 to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <4 x i16> @stest_f32i16_mm(<4 x float> %x) {
+; CHECK-LABEL: stest_f32i16_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.w.x ft1, a4
+; CHECK-NEXT: fmv.w.x ft2, a3
+; CHECK-NEXT: fmv.w.x ft0, a2
+; CHECK-NEXT: fcvt.w.s a2, ft1, rtz
+; CHECK-NEXT: lui a3, 8
+; CHECK-NEXT: addiw a5, a3, -1
+; CHECK-NEXT: fcvt.w.s a3, ft2, rtz
+; CHECK-NEXT: blt a2, a5, .LBB39_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a2, a5
+; CHECK-NEXT: .LBB39_2: # %entry
+; CHECK-NEXT: fmv.w.x ft1, a1
+; CHECK-NEXT: fcvt.w.s a1, ft0, rtz
+; CHECK-NEXT: bge a3, a5, .LBB39_11
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: fcvt.w.s a4, ft1, rtz
+; CHECK-NEXT: bge a1, a5, .LBB39_12
+; CHECK-NEXT: .LBB39_4: # %entry
+; CHECK-NEXT: bge a4, a5, .LBB39_13
+; CHECK-NEXT: .LBB39_5: # %entry
+; CHECK-NEXT: lui a5, 1048568
+; CHECK-NEXT: bge a5, a4, .LBB39_14
+; CHECK-NEXT: .LBB39_6: # %entry
+; CHECK-NEXT: bge a5, a1, .LBB39_15
+; CHECK-NEXT: .LBB39_7: # %entry
+; CHECK-NEXT: bge a5, a3, .LBB39_16
+; CHECK-NEXT: .LBB39_8: # %entry
+; CHECK-NEXT: blt a5, a2, .LBB39_10
+; CHECK-NEXT: .LBB39_9: # %entry
+; CHECK-NEXT: lui a2, 1048568
+; CHECK-NEXT: .LBB39_10: # %entry
+; CHECK-NEXT: sh a2, 6(a0)
+; CHECK-NEXT: sh a3, 4(a0)
+; CHECK-NEXT: sh a1, 2(a0)
+; CHECK-NEXT: sh a4, 0(a0)
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB39_11: # %entry
+; CHECK-NEXT: mv a3, a5
+; CHECK-NEXT: fcvt.w.s a4, ft1, rtz
+; CHECK-NEXT: blt a1, a5, .LBB39_4
+; CHECK-NEXT: .LBB39_12: # %entry
+; CHECK-NEXT: mv a1, a5
+; CHECK-NEXT: blt a4, a5, .LBB39_5
+; CHECK-NEXT: .LBB39_13: # %entry
+; CHECK-NEXT: mv a4, a5
+; CHECK-NEXT: lui a5, 1048568
+; CHECK-NEXT: blt a5, a4, .LBB39_6
+; CHECK-NEXT: .LBB39_14: # %entry
+; CHECK-NEXT: lui a4, 1048568
+; CHECK-NEXT: blt a5, a1, .LBB39_7
+; CHECK-NEXT: .LBB39_15: # %entry
+; CHECK-NEXT: lui a1, 1048568
+; CHECK-NEXT: blt a5, a3, .LBB39_8
+; CHECK-NEXT: .LBB39_16: # %entry
+; CHECK-NEXT: lui a3, 1048568
+; CHECK-NEXT: bge a5, a2, .LBB39_9
+; CHECK-NEXT: j .LBB39_10
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i32>
+ %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %conv, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
+ %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
+ %conv6 = trunc <4 x i32> %spec.store.select7 to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <4 x i16> @utest_f32i16_mm(<4 x float> %x) {
+; CHECK-LABEL: utest_f32i16_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.w.x ft1, a1
+; CHECK-NEXT: fmv.w.x ft2, a2
+; CHECK-NEXT: fmv.w.x ft0, a3
+; CHECK-NEXT: fcvt.wu.s a1, ft1, rtz
+; CHECK-NEXT: lui a2, 16
+; CHECK-NEXT: addiw a3, a2, -1
+; CHECK-NEXT: fcvt.wu.s a2, ft2, rtz
+; CHECK-NEXT: bltu a1, a3, .LBB40_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a1, a3
+; CHECK-NEXT: .LBB40_2: # %entry
+; CHECK-NEXT: fmv.w.x ft1, a4
+; CHECK-NEXT: fcvt.wu.s a4, ft0, rtz
+; CHECK-NEXT: bgeu a2, a3, .LBB40_7
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: fcvt.wu.s a5, ft1, rtz
+; CHECK-NEXT: bgeu a4, a3, .LBB40_8
+; CHECK-NEXT: .LBB40_4: # %entry
+; CHECK-NEXT: bltu a5, a3, .LBB40_6
+; CHECK-NEXT: .LBB40_5: # %entry
+; CHECK-NEXT: mv a5, a3
+; CHECK-NEXT: .LBB40_6: # %entry
+; CHECK-NEXT: sh a5, 6(a0)
+; CHECK-NEXT: sh a4, 4(a0)
+; CHECK-NEXT: sh a2, 2(a0)
+; CHECK-NEXT: sh a1, 0(a0)
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB40_7: # %entry
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: fcvt.wu.s a5, ft1, rtz
+; CHECK-NEXT: bltu a4, a3, .LBB40_4
+; CHECK-NEXT: .LBB40_8: # %entry
+; CHECK-NEXT: mv a4, a3
+; CHECK-NEXT: bgeu a5, a3, .LBB40_5
+; CHECK-NEXT: j .LBB40_6
+entry:
+ %conv = fptoui <4 x float> %x to <4 x i32>
+ %spec.store.select = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %conv, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
+ %conv6 = trunc <4 x i32> %spec.store.select to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) {
+; CHECK-LABEL: ustest_f32i16_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: fmv.w.x ft1, a4
+; CHECK-NEXT: fmv.w.x ft2, a3
+; CHECK-NEXT: fmv.w.x ft0, a2
+; CHECK-NEXT: fcvt.w.s a2, ft1, rtz
+; CHECK-NEXT: lui a3, 16
+; CHECK-NEXT: addiw a5, a3, -1
+; CHECK-NEXT: fcvt.w.s a3, ft2, rtz
+; CHECK-NEXT: blt a2, a5, .LBB41_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a2, a5
+; CHECK-NEXT: .LBB41_2: # %entry
+; CHECK-NEXT: fmv.w.x ft1, a1
+; CHECK-NEXT: fcvt.w.s a1, ft0, rtz
+; CHECK-NEXT: bge a3, a5, .LBB41_11
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: fcvt.w.s a4, ft1, rtz
+; CHECK-NEXT: bge a1, a5, .LBB41_12
+; CHECK-NEXT: .LBB41_4: # %entry
+; CHECK-NEXT: bge a4, a5, .LBB41_13
+; CHECK-NEXT: .LBB41_5: # %entry
+; CHECK-NEXT: blez a4, .LBB41_14
+; CHECK-NEXT: .LBB41_6: # %entry
+; CHECK-NEXT: blez a1, .LBB41_15
+; CHECK-NEXT: .LBB41_7: # %entry
+; CHECK-NEXT: blez a3, .LBB41_16
+; CHECK-NEXT: .LBB41_8: # %entry
+; CHECK-NEXT: bgtz a2, .LBB41_10
+; CHECK-NEXT: .LBB41_9: # %entry
+; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: .LBB41_10: # %entry
+; CHECK-NEXT: sh a2, 6(a0)
+; CHECK-NEXT: sh a3, 4(a0)
+; CHECK-NEXT: sh a1, 2(a0)
+; CHECK-NEXT: sh a4, 0(a0)
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB41_11: # %entry
+; CHECK-NEXT: mv a3, a5
+; CHECK-NEXT: fcvt.w.s a4, ft1, rtz
+; CHECK-NEXT: blt a1, a5, .LBB41_4
+; CHECK-NEXT: .LBB41_12: # %entry
+; CHECK-NEXT: mv a1, a5
+; CHECK-NEXT: blt a4, a5, .LBB41_5
+; CHECK-NEXT: .LBB41_13: # %entry
+; CHECK-NEXT: mv a4, a5
+; CHECK-NEXT: bgtz a4, .LBB41_6
+; CHECK-NEXT: .LBB41_14: # %entry
+; CHECK-NEXT: li a4, 0
+; CHECK-NEXT: bgtz a1, .LBB41_7
+; CHECK-NEXT: .LBB41_15: # %entry
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: bgtz a3, .LBB41_8
+; CHECK-NEXT: .LBB41_16: # %entry
+; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: blez a2, .LBB41_9
+; CHECK-NEXT: j .LBB41_10
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i32>
+ %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %conv, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
+ %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer)
+ %conv6 = trunc <4 x i32> %spec.store.select7 to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
+; CHECK-LABEL: stest_f16i16_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -96
+; CHECK-NEXT: .cfi_def_cfa_offset 96
+; CHECK-NEXT: sd ra, 88(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 80(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 72(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 64(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s3, 56(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s4, 48(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s5, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s6, 32(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s7, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s8, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s9, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: .cfi_offset s3, -40
+; CHECK-NEXT: .cfi_offset s4, -48
+; CHECK-NEXT: .cfi_offset s5, -56
+; CHECK-NEXT: .cfi_offset s6, -64
+; CHECK-NEXT: .cfi_offset s7, -72
+; CHECK-NEXT: .cfi_offset s8, -80
+; CHECK-NEXT: .cfi_offset s9, -88
+; CHECK-NEXT: lhu s6, 56(a1)
+; CHECK-NEXT: lhu s2, 0(a1)
+; CHECK-NEXT: lhu s3, 8(a1)
+; CHECK-NEXT: lhu s4, 16(a1)
+; CHECK-NEXT: lhu s5, 24(a1)
+; CHECK-NEXT: lhu s1, 32(a1)
+; CHECK-NEXT: lhu s0, 40(a1)
+; CHECK-NEXT: lhu a1, 48(a1)
+; CHECK-NEXT: mv s8, a0
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s9, a0
+; CHECK-NEXT: mv a0, s0
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: mv a0, s1
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s7, a0
+; CHECK-NEXT: mv a0, s5
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s5, a0
+; CHECK-NEXT: mv a0, s4
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s4, a0
+; CHECK-NEXT: mv a0, s3
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s3, a0
+; CHECK-NEXT: mv a0, s2
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s2, a0
+; CHECK-NEXT: fmv.w.x ft0, s0
+; CHECK-NEXT: fsw ft0, 4(sp) # 4-byte Folded Spill
+; CHECK-NEXT: fmv.w.x ft0, s9
+; CHECK-NEXT: fcvt.l.s s9, ft0, rtz
+; CHECK-NEXT: mv a0, s6
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: fmv.w.x ft0, a0
+; CHECK-NEXT: fcvt.l.s a0, ft0, rtz
+; CHECK-NEXT: lui a1, 8
+; CHECK-NEXT: addiw s1, a1, -1
+; CHECK-NEXT: blt a0, s1, .LBB42_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a0, s1
+; CHECK-NEXT: .LBB42_2: # %entry
+; CHECK-NEXT: fmv.w.x ft1, s7
+; CHECK-NEXT: flw ft0, 4(sp) # 4-byte Folded Reload
+; CHECK-NEXT: fcvt.l.s a1, ft0, rtz
+; CHECK-NEXT: blt s9, s1, .LBB42_4
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: mv s9, s1
+; CHECK-NEXT: .LBB42_4: # %entry
+; CHECK-NEXT: fmv.w.x ft0, s5
+; CHECK-NEXT: fcvt.l.s a2, ft1, rtz
+; CHECK-NEXT: blt a1, s1, .LBB42_6
+; CHECK-NEXT: # %bb.5: # %entry
+; CHECK-NEXT: mv a1, s1
+; CHECK-NEXT: .LBB42_6: # %entry
+; CHECK-NEXT: fmv.w.x ft1, s4
+; CHECK-NEXT: fcvt.l.s a3, ft0, rtz
+; CHECK-NEXT: blt a2, s1, .LBB42_8
+; CHECK-NEXT: # %bb.7: # %entry
+; CHECK-NEXT: mv a2, s1
+; CHECK-NEXT: .LBB42_8: # %entry
+; CHECK-NEXT: fmv.w.x ft0, s3
+; CHECK-NEXT: fcvt.l.s a4, ft1, rtz
+; CHECK-NEXT: blt a3, s1, .LBB42_10
+; CHECK-NEXT: # %bb.9: # %entry
+; CHECK-NEXT: mv a3, s1
+; CHECK-NEXT: .LBB42_10: # %entry
+; CHECK-NEXT: fmv.w.x ft1, s2
+; CHECK-NEXT: fcvt.l.s a5, ft0, rtz
+; CHECK-NEXT: bge a4, s1, .LBB42_23
+; CHECK-NEXT: # %bb.11: # %entry
+; CHECK-NEXT: fcvt.l.s s0, ft1, rtz
+; CHECK-NEXT: bge a5, s1, .LBB42_24
+; CHECK-NEXT: .LBB42_12: # %entry
+; CHECK-NEXT: bge s0, s1, .LBB42_25
+; CHECK-NEXT: .LBB42_13: # %entry
+; CHECK-NEXT: lui s1, 1048568
+; CHECK-NEXT: bge s1, s0, .LBB42_26
+; CHECK-NEXT: .LBB42_14: # %entry
+; CHECK-NEXT: bge s1, a5, .LBB42_27
+; CHECK-NEXT: .LBB42_15: # %entry
+; CHECK-NEXT: bge s1, a4, .LBB42_28
+; CHECK-NEXT: .LBB42_16: # %entry
+; CHECK-NEXT: bge s1, a3, .LBB42_29
+; CHECK-NEXT: .LBB42_17: # %entry
+; CHECK-NEXT: bge s1, a2, .LBB42_30
+; CHECK-NEXT: .LBB42_18: # %entry
+; CHECK-NEXT: bge s1, a1, .LBB42_31
+; CHECK-NEXT: .LBB42_19: # %entry
+; CHECK-NEXT: bge s1, s9, .LBB42_32
+; CHECK-NEXT: .LBB42_20: # %entry
+; CHECK-NEXT: blt s1, a0, .LBB42_22
+; CHECK-NEXT: .LBB42_21: # %entry
+; CHECK-NEXT: lui a0, 1048568
+; CHECK-NEXT: .LBB42_22: # %entry
+; CHECK-NEXT: sh a0, 14(s8)
+; CHECK-NEXT: sh s9, 12(s8)
+; CHECK-NEXT: sh a1, 10(s8)
+; CHECK-NEXT: sh a2, 8(s8)
+; CHECK-NEXT: sh a3, 6(s8)
+; CHECK-NEXT: sh a4, 4(s8)
+; CHECK-NEXT: sh a5, 2(s8)
+; CHECK-NEXT: sh s0, 0(s8)
+; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 80(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 64(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s3, 56(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s4, 48(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s5, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s6, 32(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s7, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s8, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s9, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 96
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB42_23: # %entry
+; CHECK-NEXT: mv a4, s1
+; CHECK-NEXT: fcvt.l.s s0, ft1, rtz
+; CHECK-NEXT: blt a5, s1, .LBB42_12
+; CHECK-NEXT: .LBB42_24: # %entry
+; CHECK-NEXT: mv a5, s1
+; CHECK-NEXT: blt s0, s1, .LBB42_13
+; CHECK-NEXT: .LBB42_25: # %entry
+; CHECK-NEXT: mv s0, s1
+; CHECK-NEXT: lui s1, 1048568
+; CHECK-NEXT: blt s1, s0, .LBB42_14
+; CHECK-NEXT: .LBB42_26: # %entry
+; CHECK-NEXT: lui s0, 1048568
+; CHECK-NEXT: blt s1, a5, .LBB42_15
+; CHECK-NEXT: .LBB42_27: # %entry
+; CHECK-NEXT: lui a5, 1048568
+; CHECK-NEXT: blt s1, a4, .LBB42_16
+; CHECK-NEXT: .LBB42_28: # %entry
+; CHECK-NEXT: lui a4, 1048568
+; CHECK-NEXT: blt s1, a3, .LBB42_17
+; CHECK-NEXT: .LBB42_29: # %entry
+; CHECK-NEXT: lui a3, 1048568
+; CHECK-NEXT: blt s1, a2, .LBB42_18
+; CHECK-NEXT: .LBB42_30: # %entry
+; CHECK-NEXT: lui a2, 1048568
+; CHECK-NEXT: blt s1, a1, .LBB42_19
+; CHECK-NEXT: .LBB42_31: # %entry
+; CHECK-NEXT: lui a1, 1048568
+; CHECK-NEXT: blt s1, s9, .LBB42_20
+; CHECK-NEXT: .LBB42_32: # %entry
+; CHECK-NEXT: lui s9, 1048568
+; CHECK-NEXT: bge s1, a0, .LBB42_21
+; CHECK-NEXT: j .LBB42_22
+entry:
+ %conv = fptosi <8 x half> %x to <8 x i32>
+ %spec.store.select = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %conv, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>)
+ %spec.store.select7 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %spec.store.select, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
+ %conv6 = trunc <8 x i32> %spec.store.select7 to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
+; CHECK-LABEL: utesth_f16i16_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -96
+; CHECK-NEXT: .cfi_def_cfa_offset 96
+; CHECK-NEXT: sd ra, 88(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 80(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 72(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 64(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s3, 56(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s4, 48(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s5, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s6, 32(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s7, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s8, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s9, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: .cfi_offset s3, -40
+; CHECK-NEXT: .cfi_offset s4, -48
+; CHECK-NEXT: .cfi_offset s5, -56
+; CHECK-NEXT: .cfi_offset s6, -64
+; CHECK-NEXT: .cfi_offset s7, -72
+; CHECK-NEXT: .cfi_offset s8, -80
+; CHECK-NEXT: .cfi_offset s9, -88
+; CHECK-NEXT: lhu s5, 0(a1)
+; CHECK-NEXT: lhu s2, 56(a1)
+; CHECK-NEXT: lhu s3, 48(a1)
+; CHECK-NEXT: lhu s4, 40(a1)
+; CHECK-NEXT: lhu s6, 32(a1)
+; CHECK-NEXT: lhu s7, 24(a1)
+; CHECK-NEXT: lhu s1, 16(a1)
+; CHECK-NEXT: lhu a1, 8(a1)
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s8, a0
+; CHECK-NEXT: mv a0, s1
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s9, a0
+; CHECK-NEXT: mv a0, s7
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s1, a0
+; CHECK-NEXT: mv a0, s6
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s6, a0
+; CHECK-NEXT: mv a0, s4
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s4, a0
+; CHECK-NEXT: mv a0, s3
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s3, a0
+; CHECK-NEXT: mv a0, s2
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s2, a0
+; CHECK-NEXT: fmv.w.x ft0, s1
+; CHECK-NEXT: fsw ft0, 4(sp) # 4-byte Folded Spill
+; CHECK-NEXT: fmv.w.x ft0, s9
+; CHECK-NEXT: fcvt.lu.s s1, ft0, rtz
+; CHECK-NEXT: fmv.w.x ft0, s8
+; CHECK-NEXT: fcvt.lu.s a0, ft0, rtz
+; CHECK-NEXT: sext.w s7, a0
+; CHECK-NEXT: mv a0, s5
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: fmv.w.x ft0, a0
+; CHECK-NEXT: fcvt.lu.s a0, ft0, rtz
+; CHECK-NEXT: sext.w a6, a0
+; CHECK-NEXT: lui a0, 16
+; CHECK-NEXT: addiw a1, a0, -1
+; CHECK-NEXT: bltu a6, a1, .LBB43_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a6, a1
+; CHECK-NEXT: .LBB43_2: # %entry
+; CHECK-NEXT: fmv.w.x ft0, s6
+; CHECK-NEXT: flw ft1, 4(sp) # 4-byte Folded Reload
+; CHECK-NEXT: fcvt.lu.s a0, ft1, rtz
+; CHECK-NEXT: sext.w a2, s1
+; CHECK-NEXT: bltu s7, a1, .LBB43_4
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: mv s7, a1
+; CHECK-NEXT: .LBB43_4: # %entry
+; CHECK-NEXT: fmv.w.x ft1, s4
+; CHECK-NEXT: fcvt.lu.s a4, ft0, rtz
+; CHECK-NEXT: sext.w a3, a0
+; CHECK-NEXT: bltu a2, a1, .LBB43_6
+; CHECK-NEXT: # %bb.5: # %entry
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: .LBB43_6: # %entry
+; CHECK-NEXT: fmv.w.x ft0, s3
+; CHECK-NEXT: fcvt.lu.s a0, ft1, rtz
+; CHECK-NEXT: sext.w a4, a4
+; CHECK-NEXT: bltu a3, a1, .LBB43_8
+; CHECK-NEXT: # %bb.7: # %entry
+; CHECK-NEXT: mv a3, a1
+; CHECK-NEXT: .LBB43_8: # %entry
+; CHECK-NEXT: fmv.w.x ft1, s2
+; CHECK-NEXT: fcvt.lu.s s1, ft0, rtz
+; CHECK-NEXT: sext.w a5, a0
+; CHECK-NEXT: bltu a4, a1, .LBB43_10
+; CHECK-NEXT: # %bb.9: # %entry
+; CHECK-NEXT: mv a4, a1
+; CHECK-NEXT: .LBB43_10: # %entry
+; CHECK-NEXT: fcvt.lu.s a0, ft1, rtz
+; CHECK-NEXT: sext.w s1, s1
+; CHECK-NEXT: bgeu a5, a1, .LBB43_15
+; CHECK-NEXT: # %bb.11: # %entry
+; CHECK-NEXT: sext.w a0, a0
+; CHECK-NEXT: bgeu s1, a1, .LBB43_16
+; CHECK-NEXT: .LBB43_12: # %entry
+; CHECK-NEXT: bltu a0, a1, .LBB43_14
+; CHECK-NEXT: .LBB43_13: # %entry
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB43_14: # %entry
+; CHECK-NEXT: sh a0, 14(s0)
+; CHECK-NEXT: sh s1, 12(s0)
+; CHECK-NEXT: sh a5, 10(s0)
+; CHECK-NEXT: sh a4, 8(s0)
+; CHECK-NEXT: sh a3, 6(s0)
+; CHECK-NEXT: sh a2, 4(s0)
+; CHECK-NEXT: sh s7, 2(s0)
+; CHECK-NEXT: sh a6, 0(s0)
+; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 80(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 64(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s3, 56(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s4, 48(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s5, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s6, 32(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s7, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s8, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s9, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 96
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB43_15: # %entry
+; CHECK-NEXT: mv a5, a1
+; CHECK-NEXT: sext.w a0, a0
+; CHECK-NEXT: bltu s1, a1, .LBB43_12
+; CHECK-NEXT: .LBB43_16: # %entry
+; CHECK-NEXT: mv s1, a1
+; CHECK-NEXT: bgeu a0, a1, .LBB43_13
+; CHECK-NEXT: j .LBB43_14
+entry:
+ %conv = fptoui <8 x half> %x to <8 x i32>
+ %spec.store.select = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %conv, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>)
+ %conv6 = trunc <8 x i32> %spec.store.select to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
+; CHECK-LABEL: ustest_f16i16_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -96
+; CHECK-NEXT: .cfi_def_cfa_offset 96
+; CHECK-NEXT: sd ra, 88(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 80(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 72(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 64(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s3, 56(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s4, 48(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s5, 40(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s6, 32(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s7, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s8, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s9, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: .cfi_offset s3, -40
+; CHECK-NEXT: .cfi_offset s4, -48
+; CHECK-NEXT: .cfi_offset s5, -56
+; CHECK-NEXT: .cfi_offset s6, -64
+; CHECK-NEXT: .cfi_offset s7, -72
+; CHECK-NEXT: .cfi_offset s8, -80
+; CHECK-NEXT: .cfi_offset s9, -88
+; CHECK-NEXT: lhu s6, 56(a1)
+; CHECK-NEXT: lhu s2, 0(a1)
+; CHECK-NEXT: lhu s3, 8(a1)
+; CHECK-NEXT: lhu s4, 16(a1)
+; CHECK-NEXT: lhu s5, 24(a1)
+; CHECK-NEXT: lhu s1, 32(a1)
+; CHECK-NEXT: lhu s0, 40(a1)
+; CHECK-NEXT: lhu a1, 48(a1)
+; CHECK-NEXT: mv s8, a0
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s9, a0
+; CHECK-NEXT: mv a0, s0
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: mv a0, s1
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s7, a0
+; CHECK-NEXT: mv a0, s5
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s5, a0
+; CHECK-NEXT: mv a0, s4
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s4, a0
+; CHECK-NEXT: mv a0, s3
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s3, a0
+; CHECK-NEXT: mv a0, s2
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: mv s2, a0
+; CHECK-NEXT: fmv.w.x ft0, s0
+; CHECK-NEXT: fsw ft0, 4(sp) # 4-byte Folded Spill
+; CHECK-NEXT: fmv.w.x ft0, s9
+; CHECK-NEXT: fcvt.l.s s9, ft0, rtz
+; CHECK-NEXT: mv a0, s6
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: fmv.w.x ft0, a0
+; CHECK-NEXT: fcvt.l.s a0, ft0, rtz
+; CHECK-NEXT: lui a1, 16
+; CHECK-NEXT: addiw s1, a1, -1
+; CHECK-NEXT: blt a0, s1, .LBB44_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a0, s1
+; CHECK-NEXT: .LBB44_2: # %entry
+; CHECK-NEXT: fmv.w.x ft1, s7
+; CHECK-NEXT: flw ft0, 4(sp) # 4-byte Folded Reload
+; CHECK-NEXT: fcvt.l.s a1, ft0, rtz
+; CHECK-NEXT: blt s9, s1, .LBB44_4
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: mv s9, s1
+; CHECK-NEXT: .LBB44_4: # %entry
+; CHECK-NEXT: fmv.w.x ft0, s5
+; CHECK-NEXT: fcvt.l.s a2, ft1, rtz
+; CHECK-NEXT: blt a1, s1, .LBB44_6
+; CHECK-NEXT: # %bb.5: # %entry
+; CHECK-NEXT: mv a1, s1
+; CHECK-NEXT: .LBB44_6: # %entry
+; CHECK-NEXT: fmv.w.x ft1, s4
+; CHECK-NEXT: fcvt.l.s a3, ft0, rtz
+; CHECK-NEXT: blt a2, s1, .LBB44_8
+; CHECK-NEXT: # %bb.7: # %entry
+; CHECK-NEXT: mv a2, s1
+; CHECK-NEXT: .LBB44_8: # %entry
+; CHECK-NEXT: fmv.w.x ft0, s3
+; CHECK-NEXT: fcvt.l.s a4, ft1, rtz
+; CHECK-NEXT: blt a3, s1, .LBB44_10
+; CHECK-NEXT: # %bb.9: # %entry
+; CHECK-NEXT: mv a3, s1
+; CHECK-NEXT: .LBB44_10: # %entry
+; CHECK-NEXT: fmv.w.x ft1, s2
+; CHECK-NEXT: fcvt.l.s a5, ft0, rtz
+; CHECK-NEXT: bge a4, s1, .LBB44_23
+; CHECK-NEXT: # %bb.11: # %entry
+; CHECK-NEXT: fcvt.l.s s0, ft1, rtz
+; CHECK-NEXT: bge a5, s1, .LBB44_24
+; CHECK-NEXT: .LBB44_12: # %entry
+; CHECK-NEXT: bge s0, s1, .LBB44_25
+; CHECK-NEXT: .LBB44_13: # %entry
+; CHECK-NEXT: blez s0, .LBB44_26
+; CHECK-NEXT: .LBB44_14: # %entry
+; CHECK-NEXT: blez a5, .LBB44_27
+; CHECK-NEXT: .LBB44_15: # %entry
+; CHECK-NEXT: blez a4, .LBB44_28
+; CHECK-NEXT: .LBB44_16: # %entry
+; CHECK-NEXT: blez a3, .LBB44_29
+; CHECK-NEXT: .LBB44_17: # %entry
+; CHECK-NEXT: blez a2, .LBB44_30
+; CHECK-NEXT: .LBB44_18: # %entry
+; CHECK-NEXT: blez a1, .LBB44_31
+; CHECK-NEXT: .LBB44_19: # %entry
+; CHECK-NEXT: blez s9, .LBB44_32
+; CHECK-NEXT: .LBB44_20: # %entry
+; CHECK-NEXT: bgtz a0, .LBB44_22
+; CHECK-NEXT: .LBB44_21: # %entry
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: .LBB44_22: # %entry
+; CHECK-NEXT: sh a0, 14(s8)
+; CHECK-NEXT: sh s9, 12(s8)
+; CHECK-NEXT: sh a1, 10(s8)
+; CHECK-NEXT: sh a2, 8(s8)
+; CHECK-NEXT: sh a3, 6(s8)
+; CHECK-NEXT: sh a4, 4(s8)
+; CHECK-NEXT: sh a5, 2(s8)
+; CHECK-NEXT: sh s0, 0(s8)
+; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 80(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 64(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s3, 56(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s4, 48(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s5, 40(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s6, 32(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s7, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s8, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s9, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 96
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB44_23: # %entry
+; CHECK-NEXT: mv a4, s1
+; CHECK-NEXT: fcvt.l.s s0, ft1, rtz
+; CHECK-NEXT: blt a5, s1, .LBB44_12
+; CHECK-NEXT: .LBB44_24: # %entry
+; CHECK-NEXT: mv a5, s1
+; CHECK-NEXT: blt s0, s1, .LBB44_13
+; CHECK-NEXT: .LBB44_25: # %entry
+; CHECK-NEXT: mv s0, s1
+; CHECK-NEXT: bgtz s0, .LBB44_14
+; CHECK-NEXT: .LBB44_26: # %entry
+; CHECK-NEXT: li s0, 0
+; CHECK-NEXT: bgtz a5, .LBB44_15
+; CHECK-NEXT: .LBB44_27: # %entry
+; CHECK-NEXT: li a5, 0
+; CHECK-NEXT: bgtz a4, .LBB44_16
+; CHECK-NEXT: .LBB44_28: # %entry
+; CHECK-NEXT: li a4, 0
+; CHECK-NEXT: bgtz a3, .LBB44_17
+; CHECK-NEXT: .LBB44_29: # %entry
+; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: bgtz a2, .LBB44_18
+; CHECK-NEXT: .LBB44_30: # %entry
+; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: bgtz a1, .LBB44_19
+; CHECK-NEXT: .LBB44_31: # %entry
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: bgtz s9, .LBB44_20
+; CHECK-NEXT: .LBB44_32: # %entry
+; CHECK-NEXT: li s9, 0
+; CHECK-NEXT: blez a0, .LBB44_21
+; CHECK-NEXT: j .LBB44_22
+entry:
+ %conv = fptosi <8 x half> %x to <8 x i32>
+ %spec.store.select = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %conv, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>)
+ %spec.store.select7 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %spec.store.select, <8 x i32> zeroinitializer)
+ %conv6 = trunc <8 x i32> %spec.store.select7 to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+; i64 saturate
+
+define <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i64_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: mv s2, a1
+; CHECK-NEXT: call __fixdfti at plt
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: mv s1, a1
+; CHECK-NEXT: mv a0, s2
+; CHECK-NEXT: call __fixdfti at plt
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: srli a3, a0, 1
+; CHECK-NEXT: mv a4, a2
+; CHECK-NEXT: bgez a1, .LBB45_17
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: bgeu a2, a3, .LBB45_18
+; CHECK-NEXT: .LBB45_2: # %entry
+; CHECK-NEXT: bnez a1, .LBB45_19
+; CHECK-NEXT: .LBB45_3: # %entry
+; CHECK-NEXT: mv a4, s0
+; CHECK-NEXT: bgez s1, .LBB45_20
+; CHECK-NEXT: .LBB45_4: # %entry
+; CHECK-NEXT: bgeu s0, a3, .LBB45_21
+; CHECK-NEXT: .LBB45_5: # %entry
+; CHECK-NEXT: bnez s1, .LBB45_22
+; CHECK-NEXT: .LBB45_6: # %entry
+; CHECK-NEXT: bgez a1, .LBB45_23
+; CHECK-NEXT: .LBB45_7: # %entry
+; CHECK-NEXT: bltz s1, .LBB45_9
+; CHECK-NEXT: .LBB45_8: # %entry
+; CHECK-NEXT: li s1, 0
+; CHECK-NEXT: .LBB45_9: # %entry
+; CHECK-NEXT: slli a3, a0, 63
+; CHECK-NEXT: mv a4, s0
+; CHECK-NEXT: bltz s1, .LBB45_24
+; CHECK-NEXT: # %bb.10: # %entry
+; CHECK-NEXT: bgeu a3, s0, .LBB45_25
+; CHECK-NEXT: .LBB45_11: # %entry
+; CHECK-NEXT: bne s1, a0, .LBB45_26
+; CHECK-NEXT: .LBB45_12: # %entry
+; CHECK-NEXT: mv a4, a2
+; CHECK-NEXT: bltz a1, .LBB45_27
+; CHECK-NEXT: .LBB45_13: # %entry
+; CHECK-NEXT: bgeu a3, a2, .LBB45_28
+; CHECK-NEXT: .LBB45_14: # %entry
+; CHECK-NEXT: beq a1, a0, .LBB45_16
+; CHECK-NEXT: .LBB45_15: # %entry
+; CHECK-NEXT: mv a2, a4
+; CHECK-NEXT: .LBB45_16: # %entry
+; CHECK-NEXT: mv a0, s0
+; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB45_17: # %entry
+; CHECK-NEXT: mv a4, a3
+; CHECK-NEXT: bltu a2, a3, .LBB45_2
+; CHECK-NEXT: .LBB45_18: # %entry
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: beqz a1, .LBB45_3
+; CHECK-NEXT: .LBB45_19: # %entry
+; CHECK-NEXT: mv a2, a4
+; CHECK-NEXT: mv a4, s0
+; CHECK-NEXT: bltz s1, .LBB45_4
+; CHECK-NEXT: .LBB45_20: # %entry
+; CHECK-NEXT: mv a4, a3
+; CHECK-NEXT: bltu s0, a3, .LBB45_5
+; CHECK-NEXT: .LBB45_21: # %entry
+; CHECK-NEXT: mv s0, a3
+; CHECK-NEXT: beqz s1, .LBB45_6
+; CHECK-NEXT: .LBB45_22: # %entry
+; CHECK-NEXT: mv s0, a4
+; CHECK-NEXT: bltz a1, .LBB45_7
+; CHECK-NEXT: .LBB45_23: # %entry
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: bgez s1, .LBB45_8
+; CHECK-NEXT: j .LBB45_9
+; CHECK-NEXT: .LBB45_24: # %entry
+; CHECK-NEXT: mv a4, a3
+; CHECK-NEXT: bltu a3, s0, .LBB45_11
+; CHECK-NEXT: .LBB45_25: # %entry
+; CHECK-NEXT: mv s0, a3
+; CHECK-NEXT: beq s1, a0, .LBB45_12
+; CHECK-NEXT: .LBB45_26: # %entry
+; CHECK-NEXT: mv s0, a4
+; CHECK-NEXT: mv a4, a2
+; CHECK-NEXT: bgez a1, .LBB45_13
+; CHECK-NEXT: .LBB45_27: # %entry
+; CHECK-NEXT: mv a4, a3
+; CHECK-NEXT: bltu a3, a2, .LBB45_14
+; CHECK-NEXT: .LBB45_28: # %entry
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: bne a1, a0, .LBB45_15
+; CHECK-NEXT: j .LBB45_16
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i64_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: call __fixunsdfti at plt
+; CHECK-NEXT: mv s2, a0
+; CHECK-NEXT: mv s1, a1
+; CHECK-NEXT: mv a0, s0
+; CHECK-NEXT: call __fixunsdfti at plt
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: mv a3, a1
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: beqz a3, .LBB46_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: .LBB46_2: # %entry
+; CHECK-NEXT: li a4, 1
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: bne a3, a4, .LBB46_7
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: bne s1, a1, .LBB46_8
+; CHECK-NEXT: .LBB46_4: # %entry
+; CHECK-NEXT: beq s1, a4, .LBB46_6
+; CHECK-NEXT: .LBB46_5: # %entry
+; CHECK-NEXT: mv a1, s2
+; CHECK-NEXT: .LBB46_6: # %entry
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB46_7: # %entry
+; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: beq s1, a1, .LBB46_4
+; CHECK-NEXT: .LBB46_8: # %entry
+; CHECK-NEXT: mv s2, a1
+; CHECK-NEXT: bne s1, a4, .LBB46_5
+; CHECK-NEXT: j .LBB46_6
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i64_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: mv s0, a1
+; CHECK-NEXT: call __fixdfti at plt
+; CHECK-NEXT: mv s2, a0
+; CHECK-NEXT: mv s1, a1
+; CHECK-NEXT: mv a0, s0
+; CHECK-NEXT: call __fixdfti at plt
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: li a5, 1
+; CHECK-NEXT: mv a3, a1
+; CHECK-NEXT: bgtz a1, .LBB47_12
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a4, s1
+; CHECK-NEXT: bgtz s1, .LBB47_13
+; CHECK-NEXT: .LBB47_2: # %entry
+; CHECK-NEXT: bgtz a2, .LBB47_14
+; CHECK-NEXT: .LBB47_3: # %entry
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: bne a2, a5, .LBB47_15
+; CHECK-NEXT: .LBB47_4: # %entry
+; CHECK-NEXT: bgtz s1, .LBB47_16
+; CHECK-NEXT: .LBB47_5: # %entry
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: bne s1, a5, .LBB47_17
+; CHECK-NEXT: .LBB47_6: # %entry
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: blez a4, .LBB47_18
+; CHECK-NEXT: .LBB47_7: # %entry
+; CHECK-NEXT: bnez a4, .LBB47_19
+; CHECK-NEXT: .LBB47_8: # %entry
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: blez a3, .LBB47_20
+; CHECK-NEXT: .LBB47_9: # %entry
+; CHECK-NEXT: beqz a3, .LBB47_11
+; CHECK-NEXT: .LBB47_10: # %entry
+; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: .LBB47_11: # %entry
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB47_12: # %entry
+; CHECK-NEXT: li a3, 1
+; CHECK-NEXT: mv a4, s1
+; CHECK-NEXT: blez s1, .LBB47_2
+; CHECK-NEXT: .LBB47_13: # %entry
+; CHECK-NEXT: li a4, 1
+; CHECK-NEXT: blez a2, .LBB47_3
+; CHECK-NEXT: .LBB47_14: # %entry
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: beq a2, a5, .LBB47_4
+; CHECK-NEXT: .LBB47_15: # %entry
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: blez s1, .LBB47_5
+; CHECK-NEXT: .LBB47_16: # %entry
+; CHECK-NEXT: li s2, 0
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: beq s1, a5, .LBB47_6
+; CHECK-NEXT: .LBB47_17: # %entry
+; CHECK-NEXT: mv a0, s2
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: bgtz a4, .LBB47_7
+; CHECK-NEXT: .LBB47_18: # %entry
+; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: beqz a4, .LBB47_8
+; CHECK-NEXT: .LBB47_19: # %entry
+; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: bgtz a3, .LBB47_9
+; CHECK-NEXT: .LBB47_20: # %entry
+; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: bnez a3, .LBB47_10
+; CHECK-NEXT: j .LBB47_11
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> zeroinitializer)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
+; CHECK-LABEL: stest_f32i64_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: mv s2, a1
+; CHECK-NEXT: call __fixsfti at plt
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: mv s1, a1
+; CHECK-NEXT: mv a0, s2
+; CHECK-NEXT: call __fixsfti at plt
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: srli a3, a0, 1
+; CHECK-NEXT: mv a4, a2
+; CHECK-NEXT: bgez a1, .LBB48_17
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: bgeu a2, a3, .LBB48_18
+; CHECK-NEXT: .LBB48_2: # %entry
+; CHECK-NEXT: bnez a1, .LBB48_19
+; CHECK-NEXT: .LBB48_3: # %entry
+; CHECK-NEXT: mv a4, s0
+; CHECK-NEXT: bgez s1, .LBB48_20
+; CHECK-NEXT: .LBB48_4: # %entry
+; CHECK-NEXT: bgeu s0, a3, .LBB48_21
+; CHECK-NEXT: .LBB48_5: # %entry
+; CHECK-NEXT: bnez s1, .LBB48_22
+; CHECK-NEXT: .LBB48_6: # %entry
+; CHECK-NEXT: bgez a1, .LBB48_23
+; CHECK-NEXT: .LBB48_7: # %entry
+; CHECK-NEXT: bltz s1, .LBB48_9
+; CHECK-NEXT: .LBB48_8: # %entry
+; CHECK-NEXT: li s1, 0
+; CHECK-NEXT: .LBB48_9: # %entry
+; CHECK-NEXT: slli a3, a0, 63
+; CHECK-NEXT: mv a4, s0
+; CHECK-NEXT: bltz s1, .LBB48_24
+; CHECK-NEXT: # %bb.10: # %entry
+; CHECK-NEXT: bgeu a3, s0, .LBB48_25
+; CHECK-NEXT: .LBB48_11: # %entry
+; CHECK-NEXT: bne s1, a0, .LBB48_26
+; CHECK-NEXT: .LBB48_12: # %entry
+; CHECK-NEXT: mv a4, a2
+; CHECK-NEXT: bltz a1, .LBB48_27
+; CHECK-NEXT: .LBB48_13: # %entry
+; CHECK-NEXT: bgeu a3, a2, .LBB48_28
+; CHECK-NEXT: .LBB48_14: # %entry
+; CHECK-NEXT: beq a1, a0, .LBB48_16
+; CHECK-NEXT: .LBB48_15: # %entry
+; CHECK-NEXT: mv a2, a4
+; CHECK-NEXT: .LBB48_16: # %entry
+; CHECK-NEXT: mv a0, s0
+; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB48_17: # %entry
+; CHECK-NEXT: mv a4, a3
+; CHECK-NEXT: bltu a2, a3, .LBB48_2
+; CHECK-NEXT: .LBB48_18: # %entry
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: beqz a1, .LBB48_3
+; CHECK-NEXT: .LBB48_19: # %entry
+; CHECK-NEXT: mv a2, a4
+; CHECK-NEXT: mv a4, s0
+; CHECK-NEXT: bltz s1, .LBB48_4
+; CHECK-NEXT: .LBB48_20: # %entry
+; CHECK-NEXT: mv a4, a3
+; CHECK-NEXT: bltu s0, a3, .LBB48_5
+; CHECK-NEXT: .LBB48_21: # %entry
+; CHECK-NEXT: mv s0, a3
+; CHECK-NEXT: beqz s1, .LBB48_6
+; CHECK-NEXT: .LBB48_22: # %entry
+; CHECK-NEXT: mv s0, a4
+; CHECK-NEXT: bltz a1, .LBB48_7
+; CHECK-NEXT: .LBB48_23: # %entry
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: bgez s1, .LBB48_8
+; CHECK-NEXT: j .LBB48_9
+; CHECK-NEXT: .LBB48_24: # %entry
+; CHECK-NEXT: mv a4, a3
+; CHECK-NEXT: bltu a3, s0, .LBB48_11
+; CHECK-NEXT: .LBB48_25: # %entry
+; CHECK-NEXT: mv s0, a3
+; CHECK-NEXT: beq s1, a0, .LBB48_12
+; CHECK-NEXT: .LBB48_26: # %entry
+; CHECK-NEXT: mv s0, a4
+; CHECK-NEXT: mv a4, a2
+; CHECK-NEXT: bgez a1, .LBB48_13
+; CHECK-NEXT: .LBB48_27: # %entry
+; CHECK-NEXT: mv a4, a3
+; CHECK-NEXT: bltu a3, a2, .LBB48_14
+; CHECK-NEXT: .LBB48_28: # %entry
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: bne a1, a0, .LBB48_15
+; CHECK-NEXT: j .LBB48_16
+entry:
+ %conv = fptosi <2 x float> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
+; CHECK-LABEL: utest_f32i64_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: call __fixunssfti at plt
+; CHECK-NEXT: mv s2, a0
+; CHECK-NEXT: mv s1, a1
+; CHECK-NEXT: mv a0, s0
+; CHECK-NEXT: call __fixunssfti at plt
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: mv a3, a1
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: beqz a3, .LBB49_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: .LBB49_2: # %entry
+; CHECK-NEXT: li a4, 1
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: bne a3, a4, .LBB49_7
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: bne s1, a1, .LBB49_8
+; CHECK-NEXT: .LBB49_4: # %entry
+; CHECK-NEXT: beq s1, a4, .LBB49_6
+; CHECK-NEXT: .LBB49_5: # %entry
+; CHECK-NEXT: mv a1, s2
+; CHECK-NEXT: .LBB49_6: # %entry
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB49_7: # %entry
+; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: beq s1, a1, .LBB49_4
+; CHECK-NEXT: .LBB49_8: # %entry
+; CHECK-NEXT: mv s2, a1
+; CHECK-NEXT: bne s1, a4, .LBB49_5
+; CHECK-NEXT: j .LBB49_6
+entry:
+ %conv = fptoui <2 x float> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
+; CHECK-LABEL: ustest_f32i64_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: mv s0, a1
+; CHECK-NEXT: call __fixsfti at plt
+; CHECK-NEXT: mv s2, a0
+; CHECK-NEXT: mv s1, a1
+; CHECK-NEXT: mv a0, s0
+; CHECK-NEXT: call __fixsfti at plt
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: li a5, 1
+; CHECK-NEXT: mv a3, a1
+; CHECK-NEXT: bgtz a1, .LBB50_12
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a4, s1
+; CHECK-NEXT: bgtz s1, .LBB50_13
+; CHECK-NEXT: .LBB50_2: # %entry
+; CHECK-NEXT: bgtz a2, .LBB50_14
+; CHECK-NEXT: .LBB50_3: # %entry
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: bne a2, a5, .LBB50_15
+; CHECK-NEXT: .LBB50_4: # %entry
+; CHECK-NEXT: bgtz s1, .LBB50_16
+; CHECK-NEXT: .LBB50_5: # %entry
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: bne s1, a5, .LBB50_17
+; CHECK-NEXT: .LBB50_6: # %entry
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: blez a4, .LBB50_18
+; CHECK-NEXT: .LBB50_7: # %entry
+; CHECK-NEXT: bnez a4, .LBB50_19
+; CHECK-NEXT: .LBB50_8: # %entry
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: blez a3, .LBB50_20
+; CHECK-NEXT: .LBB50_9: # %entry
+; CHECK-NEXT: beqz a3, .LBB50_11
+; CHECK-NEXT: .LBB50_10: # %entry
+; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: .LBB50_11: # %entry
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB50_12: # %entry
+; CHECK-NEXT: li a3, 1
+; CHECK-NEXT: mv a4, s1
+; CHECK-NEXT: blez s1, .LBB50_2
+; CHECK-NEXT: .LBB50_13: # %entry
+; CHECK-NEXT: li a4, 1
+; CHECK-NEXT: blez a2, .LBB50_3
+; CHECK-NEXT: .LBB50_14: # %entry
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: beq a2, a5, .LBB50_4
+; CHECK-NEXT: .LBB50_15: # %entry
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: blez s1, .LBB50_5
+; CHECK-NEXT: .LBB50_16: # %entry
+; CHECK-NEXT: li s2, 0
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: beq s1, a5, .LBB50_6
+; CHECK-NEXT: .LBB50_17: # %entry
+; CHECK-NEXT: mv a0, s2
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: bgtz a4, .LBB50_7
+; CHECK-NEXT: .LBB50_18: # %entry
+; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: beqz a4, .LBB50_8
+; CHECK-NEXT: .LBB50_19: # %entry
+; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: bgtz a3, .LBB50_9
+; CHECK-NEXT: .LBB50_20: # %entry
+; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: bnez a3, .LBB50_10
+; CHECK-NEXT: j .LBB50_11
+entry:
+ %conv = fptosi <2 x float> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> zeroinitializer)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
+; CHECK-LABEL: stest_f16i64_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: mv s2, a1
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: call __fixsfti at plt
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: mv s1, a1
+; CHECK-NEXT: mv a0, s2
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: call __fixsfti at plt
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: li a0, -1
+; CHECK-NEXT: srli a3, a0, 1
+; CHECK-NEXT: mv a4, a2
+; CHECK-NEXT: bgez a1, .LBB51_17
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: bgeu a2, a3, .LBB51_18
+; CHECK-NEXT: .LBB51_2: # %entry
+; CHECK-NEXT: bnez a1, .LBB51_19
+; CHECK-NEXT: .LBB51_3: # %entry
+; CHECK-NEXT: mv a4, s0
+; CHECK-NEXT: bgez s1, .LBB51_20
+; CHECK-NEXT: .LBB51_4: # %entry
+; CHECK-NEXT: bgeu s0, a3, .LBB51_21
+; CHECK-NEXT: .LBB51_5: # %entry
+; CHECK-NEXT: bnez s1, .LBB51_22
+; CHECK-NEXT: .LBB51_6: # %entry
+; CHECK-NEXT: bgez a1, .LBB51_23
+; CHECK-NEXT: .LBB51_7: # %entry
+; CHECK-NEXT: bltz s1, .LBB51_9
+; CHECK-NEXT: .LBB51_8: # %entry
+; CHECK-NEXT: li s1, 0
+; CHECK-NEXT: .LBB51_9: # %entry
+; CHECK-NEXT: slli a3, a0, 63
+; CHECK-NEXT: mv a4, s0
+; CHECK-NEXT: bltz s1, .LBB51_24
+; CHECK-NEXT: # %bb.10: # %entry
+; CHECK-NEXT: bgeu a3, s0, .LBB51_25
+; CHECK-NEXT: .LBB51_11: # %entry
+; CHECK-NEXT: bne s1, a0, .LBB51_26
+; CHECK-NEXT: .LBB51_12: # %entry
+; CHECK-NEXT: mv a4, a2
+; CHECK-NEXT: bltz a1, .LBB51_27
+; CHECK-NEXT: .LBB51_13: # %entry
+; CHECK-NEXT: bgeu a3, a2, .LBB51_28
+; CHECK-NEXT: .LBB51_14: # %entry
+; CHECK-NEXT: beq a1, a0, .LBB51_16
+; CHECK-NEXT: .LBB51_15: # %entry
+; CHECK-NEXT: mv a2, a4
+; CHECK-NEXT: .LBB51_16: # %entry
+; CHECK-NEXT: mv a0, s0
+; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB51_17: # %entry
+; CHECK-NEXT: mv a4, a3
+; CHECK-NEXT: bltu a2, a3, .LBB51_2
+; CHECK-NEXT: .LBB51_18: # %entry
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: beqz a1, .LBB51_3
+; CHECK-NEXT: .LBB51_19: # %entry
+; CHECK-NEXT: mv a2, a4
+; CHECK-NEXT: mv a4, s0
+; CHECK-NEXT: bltz s1, .LBB51_4
+; CHECK-NEXT: .LBB51_20: # %entry
+; CHECK-NEXT: mv a4, a3
+; CHECK-NEXT: bltu s0, a3, .LBB51_5
+; CHECK-NEXT: .LBB51_21: # %entry
+; CHECK-NEXT: mv s0, a3
+; CHECK-NEXT: beqz s1, .LBB51_6
+; CHECK-NEXT: .LBB51_22: # %entry
+; CHECK-NEXT: mv s0, a4
+; CHECK-NEXT: bltz a1, .LBB51_7
+; CHECK-NEXT: .LBB51_23: # %entry
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: bgez s1, .LBB51_8
+; CHECK-NEXT: j .LBB51_9
+; CHECK-NEXT: .LBB51_24: # %entry
+; CHECK-NEXT: mv a4, a3
+; CHECK-NEXT: bltu a3, s0, .LBB51_11
+; CHECK-NEXT: .LBB51_25: # %entry
+; CHECK-NEXT: mv s0, a3
+; CHECK-NEXT: beq s1, a0, .LBB51_12
+; CHECK-NEXT: .LBB51_26: # %entry
+; CHECK-NEXT: mv s0, a4
+; CHECK-NEXT: mv a4, a2
+; CHECK-NEXT: bgez a1, .LBB51_13
+; CHECK-NEXT: .LBB51_27: # %entry
+; CHECK-NEXT: mv a4, a3
+; CHECK-NEXT: bltu a3, a2, .LBB51_14
+; CHECK-NEXT: .LBB51_28: # %entry
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: bne a1, a0, .LBB51_15
+; CHECK-NEXT: j .LBB51_16
+entry:
+ %conv = fptosi <2 x half> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
+; CHECK-LABEL: utesth_f16i64_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: call __fixunssfti at plt
+; CHECK-NEXT: mv s2, a0
+; CHECK-NEXT: mv s1, a1
+; CHECK-NEXT: mv a0, s0
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: call __fixunssfti at plt
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: mv a3, a1
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: beqz a3, .LBB52_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: .LBB52_2: # %entry
+; CHECK-NEXT: li a4, 1
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: bne a3, a4, .LBB52_7
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: bne s1, a1, .LBB52_8
+; CHECK-NEXT: .LBB52_4: # %entry
+; CHECK-NEXT: beq s1, a4, .LBB52_6
+; CHECK-NEXT: .LBB52_5: # %entry
+; CHECK-NEXT: mv a1, s2
+; CHECK-NEXT: .LBB52_6: # %entry
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB52_7: # %entry
+; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: beq s1, a1, .LBB52_4
+; CHECK-NEXT: .LBB52_8: # %entry
+; CHECK-NEXT: mv s2, a1
+; CHECK-NEXT: bne s1, a4, .LBB52_5
+; CHECK-NEXT: j .LBB52_6
+entry:
+ %conv = fptoui <2 x half> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
+; CHECK-LABEL: ustest_f16i64_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: mv s0, a1
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: call __fixsfti at plt
+; CHECK-NEXT: mv s2, a0
+; CHECK-NEXT: mv s1, a1
+; CHECK-NEXT: mv a0, s0
+; CHECK-NEXT: call __gnu_h2f_ieee at plt
+; CHECK-NEXT: call __fixsfti at plt
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: li a5, 1
+; CHECK-NEXT: mv a3, a1
+; CHECK-NEXT: bgtz a1, .LBB53_12
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: mv a4, s1
+; CHECK-NEXT: bgtz s1, .LBB53_13
+; CHECK-NEXT: .LBB53_2: # %entry
+; CHECK-NEXT: bgtz a2, .LBB53_14
+; CHECK-NEXT: .LBB53_3: # %entry
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: bne a2, a5, .LBB53_15
+; CHECK-NEXT: .LBB53_4: # %entry
+; CHECK-NEXT: bgtz s1, .LBB53_16
+; CHECK-NEXT: .LBB53_5: # %entry
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: bne s1, a5, .LBB53_17
+; CHECK-NEXT: .LBB53_6: # %entry
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: blez a4, .LBB53_18
+; CHECK-NEXT: .LBB53_7: # %entry
+; CHECK-NEXT: bnez a4, .LBB53_19
+; CHECK-NEXT: .LBB53_8: # %entry
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: blez a3, .LBB53_20
+; CHECK-NEXT: .LBB53_9: # %entry
+; CHECK-NEXT: beqz a3, .LBB53_11
+; CHECK-NEXT: .LBB53_10: # %entry
+; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: .LBB53_11: # %entry
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB53_12: # %entry
+; CHECK-NEXT: li a3, 1
+; CHECK-NEXT: mv a4, s1
+; CHECK-NEXT: blez s1, .LBB53_2
+; CHECK-NEXT: .LBB53_13: # %entry
+; CHECK-NEXT: li a4, 1
+; CHECK-NEXT: blez a2, .LBB53_3
+; CHECK-NEXT: .LBB53_14: # %entry
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: beq a2, a5, .LBB53_4
+; CHECK-NEXT: .LBB53_15: # %entry
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: blez s1, .LBB53_5
+; CHECK-NEXT: .LBB53_16: # %entry
+; CHECK-NEXT: li s2, 0
+; CHECK-NEXT: li a0, 0
+; CHECK-NEXT: beq s1, a5, .LBB53_6
+; CHECK-NEXT: .LBB53_17: # %entry
+; CHECK-NEXT: mv a0, s2
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: bgtz a4, .LBB53_7
+; CHECK-NEXT: .LBB53_18: # %entry
+; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: beqz a4, .LBB53_8
+; CHECK-NEXT: .LBB53_19: # %entry
+; CHECK-NEXT: mv a0, a2
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: bgtz a3, .LBB53_9
+; CHECK-NEXT: .LBB53_20: # %entry
+; CHECK-NEXT: li a2, 0
+; CHECK-NEXT: bnez a3, .LBB53_10
+; CHECK-NEXT: j .LBB53_11
+entry:
+ %conv = fptosi <2 x half> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> zeroinitializer)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)
+declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
+declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>)
+declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>)
+declare <8 x i32> @llvm.umin.v8i32(<8 x i32>, <8 x i32>)
+declare <2 x i64> @llvm.smin.v2i64(<2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.smax.v2i64(<2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.umin.v2i64(<2 x i64>, <2 x i64>)
+declare <4 x i64> @llvm.smin.v4i64(<4 x i64>, <4 x i64>)
+declare <4 x i64> @llvm.smax.v4i64(<4 x i64>, <4 x i64>)
+declare <4 x i64> @llvm.umin.v4i64(<4 x i64>, <4 x i64>)
+declare <2 x i128> @llvm.smin.v2i128(<2 x i128>, <2 x i128>)
+declare <2 x i128> @llvm.smax.v2i128(<2 x i128>, <2 x i128>)
+declare <2 x i128> @llvm.umin.v2i128(<2 x i128>, <2 x i128>)
diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
new file mode 100644
index 0000000000000..530c7582a26a9
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
@@ -0,0 +1,2046 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=wasm32-unknown-unknown -mattr=+simd128,+nontrapping-fptoint | FileCheck %s
+
+; i32 saturate
+
+define i32 @stest_f64i32(double %x) {
+; CHECK-LABEL: stest_f64i32:
+; CHECK: .functype stest_f64i32 (f64) -> (i32)
+; CHECK-NEXT: .local i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i64.trunc_sat_f64_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const 2147483647
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 2147483647
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const -2147483648
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const -2147483648
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi double %x to i64
+ %0 = icmp slt i64 %conv, 2147483647
+ %spec.store.select = select i1 %0, i64 %conv, i64 2147483647
+ %1 = icmp sgt i64 %spec.store.select, -2147483648
+ %spec.store.select7 = select i1 %1, i64 %spec.store.select, i64 -2147483648
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @utest_f64i32(double %x) {
+; CHECK-LABEL: utest_f64i32:
+; CHECK: .functype utest_f64i32 (f64) -> (i32)
+; CHECK-NEXT: .local i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i64.trunc_sat_f64_u
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui double %x to i64
+ %0 = icmp ult i64 %conv, 4294967295
+ %spec.store.select = select i1 %0, i64 %conv, i64 4294967295
+ %conv6 = trunc i64 %spec.store.select to i32
+ ret i32 %conv6
+}
+
+define i32 @ustest_f64i32(double %x) {
+; CHECK-LABEL: ustest_f64i32:
+; CHECK: .functype ustest_f64i32 (f64) -> (i32)
+; CHECK-NEXT: .local i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i64.trunc_sat_f64_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi double %x to i64
+ %0 = icmp slt i64 %conv, 4294967295
+ %spec.store.select = select i1 %0, i64 %conv, i64 4294967295
+ %1 = icmp sgt i64 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i64 %spec.store.select, i64 0
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @stest_f32i32(float %x) {
+; CHECK-LABEL: stest_f32i32:
+; CHECK: .functype stest_f32i32 (f32) -> (i32)
+; CHECK-NEXT: .local i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const 2147483647
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 2147483647
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const -2147483648
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const -2147483648
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi float %x to i64
+ %0 = icmp slt i64 %conv, 2147483647
+ %spec.store.select = select i1 %0, i64 %conv, i64 2147483647
+ %1 = icmp sgt i64 %spec.store.select, -2147483648
+ %spec.store.select7 = select i1 %1, i64 %spec.store.select, i64 -2147483648
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @utest_f32i32(float %x) {
+; CHECK-LABEL: utest_f32i32:
+; CHECK: .functype utest_f32i32 (f32) -> (i32)
+; CHECK-NEXT: .local i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i64.trunc_sat_f32_u
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui float %x to i64
+ %0 = icmp ult i64 %conv, 4294967295
+ %spec.store.select = select i1 %0, i64 %conv, i64 4294967295
+ %conv6 = trunc i64 %spec.store.select to i32
+ ret i32 %conv6
+}
+
+define i32 @ustest_f32i32(float %x) {
+; CHECK-LABEL: ustest_f32i32:
+; CHECK: .functype ustest_f32i32 (f32) -> (i32)
+; CHECK-NEXT: .local i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi float %x to i64
+ %0 = icmp slt i64 %conv, 4294967295
+ %spec.store.select = select i1 %0, i64 %conv, i64 4294967295
+ %1 = icmp sgt i64 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i64 %spec.store.select, i64 0
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @stest_f16i32(half %x) {
+; CHECK-LABEL: stest_f16i32:
+; CHECK: .functype stest_f16i32 (f32) -> (i32)
+; CHECK-NEXT: .local i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const 2147483647
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 2147483647
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const -2147483648
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const -2147483648
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi half %x to i64
+ %0 = icmp slt i64 %conv, 2147483647
+ %spec.store.select = select i1 %0, i64 %conv, i64 2147483647
+ %1 = icmp sgt i64 %spec.store.select, -2147483648
+ %spec.store.select7 = select i1 %1, i64 %spec.store.select, i64 -2147483648
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @utesth_f16i32(half %x) {
+; CHECK-LABEL: utesth_f16i32:
+; CHECK: .functype utesth_f16i32 (f32) -> (i32)
+; CHECK-NEXT: .local i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i64.trunc_sat_f32_u
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui half %x to i64
+ %0 = icmp ult i64 %conv, 4294967295
+ %spec.store.select = select i1 %0, i64 %conv, i64 4294967295
+ %conv6 = trunc i64 %spec.store.select to i32
+ ret i32 %conv6
+}
+
+define i32 @ustest_f16i32(half %x) {
+; CHECK-LABEL: ustest_f16i32:
+; CHECK: .functype ustest_f16i32 (f32) -> (i32)
+; CHECK-NEXT: .local i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi half %x to i64
+ %0 = icmp slt i64 %conv, 4294967295
+ %spec.store.select = select i1 %0, i64 %conv, i64 4294967295
+ %1 = icmp sgt i64 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i64 %spec.store.select, i64 0
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+; i16 saturate
+
+define i16 @stest_f64i16(double %x) {
+; CHECK-LABEL: stest_f64i16:
+; CHECK: .functype stest_f64i16 (f64) -> (i32)
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.trunc_sat_f64_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const 32767
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 32767
+; CHECK-NEXT: i32.lt_s
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const -32768
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const -32768
+; CHECK-NEXT: i32.gt_s
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi double %x to i32
+ %0 = icmp slt i32 %conv, 32767
+ %spec.store.select = select i1 %0, i32 %conv, i32 32767
+ %1 = icmp sgt i32 %spec.store.select, -32768
+ %spec.store.select7 = select i1 %1, i32 %spec.store.select, i32 -32768
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @utest_f64i16(double %x) {
+; CHECK-LABEL: utest_f64i16:
+; CHECK: .functype utest_f64i16 (f64) -> (i32)
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.trunc_sat_f64_u
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: i32.lt_u
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui double %x to i32
+ %0 = icmp ult i32 %conv, 65535
+ %spec.store.select = select i1 %0, i32 %conv, i32 65535
+ %conv6 = trunc i32 %spec.store.select to i16
+ ret i16 %conv6
+}
+
+define i16 @ustest_f64i16(double %x) {
+; CHECK-LABEL: ustest_f64i16:
+; CHECK: .functype ustest_f64i16 (f64) -> (i32)
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.trunc_sat_f64_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: i32.lt_s
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: i32.gt_s
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi double %x to i32
+ %0 = icmp slt i32 %conv, 65535
+ %spec.store.select = select i1 %0, i32 %conv, i32 65535
+ %1 = icmp sgt i32 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i32 %spec.store.select, i32 0
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @stest_f32i16(float %x) {
+; CHECK-LABEL: stest_f32i16:
+; CHECK: .functype stest_f32i16 (f32) -> (i32)
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const 32767
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 32767
+; CHECK-NEXT: i32.lt_s
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const -32768
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const -32768
+; CHECK-NEXT: i32.gt_s
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi float %x to i32
+ %0 = icmp slt i32 %conv, 32767
+ %spec.store.select = select i1 %0, i32 %conv, i32 32767
+ %1 = icmp sgt i32 %spec.store.select, -32768
+ %spec.store.select7 = select i1 %1, i32 %spec.store.select, i32 -32768
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @utest_f32i16(float %x) {
+; CHECK-LABEL: utest_f32i16:
+; CHECK: .functype utest_f32i16 (f32) -> (i32)
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: i32.lt_u
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui float %x to i32
+ %0 = icmp ult i32 %conv, 65535
+ %spec.store.select = select i1 %0, i32 %conv, i32 65535
+ %conv6 = trunc i32 %spec.store.select to i16
+ ret i16 %conv6
+}
+
+define i16 @ustest_f32i16(float %x) {
+; CHECK-LABEL: ustest_f32i16:
+; CHECK: .functype ustest_f32i16 (f32) -> (i32)
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: i32.lt_s
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: i32.gt_s
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi float %x to i32
+ %0 = icmp slt i32 %conv, 65535
+ %spec.store.select = select i1 %0, i32 %conv, i32 65535
+ %1 = icmp sgt i32 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i32 %spec.store.select, i32 0
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @stest_f16i16(half %x) {
+; CHECK-LABEL: stest_f16i16:
+; CHECK: .functype stest_f16i16 (f32) -> (i32)
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const 32767
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 32767
+; CHECK-NEXT: i32.lt_s
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const -32768
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const -32768
+; CHECK-NEXT: i32.gt_s
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi half %x to i32
+ %0 = icmp slt i32 %conv, 32767
+ %spec.store.select = select i1 %0, i32 %conv, i32 32767
+ %1 = icmp sgt i32 %spec.store.select, -32768
+ %spec.store.select7 = select i1 %1, i32 %spec.store.select, i32 -32768
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @utesth_f16i16(half %x) {
+; CHECK-LABEL: utesth_f16i16:
+; CHECK: .functype utesth_f16i16 (f32) -> (i32)
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: i32.lt_u
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui half %x to i32
+ %0 = icmp ult i32 %conv, 65535
+ %spec.store.select = select i1 %0, i32 %conv, i32 65535
+ %conv6 = trunc i32 %spec.store.select to i16
+ ret i16 %conv6
+}
+
+define i16 @ustest_f16i16(half %x) {
+; CHECK-LABEL: ustest_f16i16:
+; CHECK: .functype ustest_f16i16 (f32) -> (i32)
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: i32.lt_s
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: i32.gt_s
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi half %x to i32
+ %0 = icmp slt i32 %conv, 65535
+ %spec.store.select = select i1 %0, i32 %conv, i32 65535
+ %1 = icmp sgt i32 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i32 %spec.store.select, i32 0
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+; i64 saturate
+
+define i64 @stest_f64i64(double %x) {
+; CHECK-LABEL: stest_f64i64:
+; CHECK: .functype stest_f64i64 (f64) -> (i64)
+; CHECK-NEXT: .local i32, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __fixdfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 3
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: i64.gt_u
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi double %x to i128
+ %0 = icmp slt i128 %conv, 9223372036854775807
+ %spec.store.select = select i1 %0, i128 %conv, i128 9223372036854775807
+ %1 = icmp sgt i128 %spec.store.select, -9223372036854775808
+ %spec.store.select7 = select i1 %1, i128 %spec.store.select, i128 -9223372036854775808
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @utest_f64i64(double %x) {
+; CHECK-LABEL: utest_f64i64:
+; CHECK: .functype utest_f64i64 (f64) -> (i64)
+; CHECK-NEXT: .local i32, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __fixunsdfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui double %x to i128
+ %0 = icmp ult i128 %conv, 18446744073709551616
+ %spec.store.select = select i1 %0, i128 %conv, i128 18446744073709551616
+ %conv6 = trunc i128 %spec.store.select to i64
+ ret i64 %conv6
+}
+
+define i64 @ustest_f64i64(double %x) {
+; CHECK-LABEL: ustest_f64i64:
+; CHECK: .functype ustest_f64i64 (f64) -> (i64)
+; CHECK-NEXT: .local i32, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __fixdfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.ne
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi double %x to i128
+ %0 = icmp slt i128 %conv, 18446744073709551616
+ %spec.store.select = select i1 %0, i128 %conv, i128 18446744073709551616
+ %1 = icmp sgt i128 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i128 %spec.store.select, i128 0
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @stest_f32i64(float %x) {
+; CHECK-LABEL: stest_f32i64:
+; CHECK: .functype stest_f32i64 (f32) -> (i64)
+; CHECK-NEXT: .local i32, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __fixsfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 3
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: i64.gt_u
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi float %x to i128
+ %0 = icmp slt i128 %conv, 9223372036854775807
+ %spec.store.select = select i1 %0, i128 %conv, i128 9223372036854775807
+ %1 = icmp sgt i128 %spec.store.select, -9223372036854775808
+ %spec.store.select7 = select i1 %1, i128 %spec.store.select, i128 -9223372036854775808
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @utest_f32i64(float %x) {
+; CHECK-LABEL: utest_f32i64:
+; CHECK: .functype utest_f32i64 (f32) -> (i64)
+; CHECK-NEXT: .local i32, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __fixunssfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui float %x to i128
+ %0 = icmp ult i128 %conv, 18446744073709551616
+ %spec.store.select = select i1 %0, i128 %conv, i128 18446744073709551616
+ %conv6 = trunc i128 %spec.store.select to i64
+ ret i64 %conv6
+}
+
+define i64 @ustest_f32i64(float %x) {
+; CHECK-LABEL: ustest_f32i64:
+; CHECK: .functype ustest_f32i64 (f32) -> (i64)
+; CHECK-NEXT: .local i32, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __fixsfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.ne
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi float %x to i128
+ %0 = icmp slt i128 %conv, 18446744073709551616
+ %spec.store.select = select i1 %0, i128 %conv, i128 18446744073709551616
+ %1 = icmp sgt i128 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i128 %spec.store.select, i128 0
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @stest_f16i64(half %x) {
+; CHECK-LABEL: stest_f16i64:
+; CHECK: .functype stest_f16i64 (f32) -> (i64)
+; CHECK-NEXT: .local i32, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: call __fixsfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 3
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: i64.gt_u
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi half %x to i128
+ %0 = icmp slt i128 %conv, 9223372036854775807
+ %spec.store.select = select i1 %0, i128 %conv, i128 9223372036854775807
+ %1 = icmp sgt i128 %spec.store.select, -9223372036854775808
+ %spec.store.select7 = select i1 %1, i128 %spec.store.select, i128 -9223372036854775808
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @utesth_f16i64(half %x) {
+; CHECK-LABEL: utesth_f16i64:
+; CHECK: .functype utesth_f16i64 (f32) -> (i64)
+; CHECK-NEXT: .local i32, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: call __fixunssfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui half %x to i128
+ %0 = icmp ult i128 %conv, 18446744073709551616
+ %spec.store.select = select i1 %0, i128 %conv, i128 18446744073709551616
+ %conv6 = trunc i128 %spec.store.select to i64
+ ret i64 %conv6
+}
+
+define i64 @ustest_f16i64(half %x) {
+; CHECK-LABEL: ustest_f16i64:
+; CHECK: .functype ustest_f16i64 (f32) -> (i64)
+; CHECK-NEXT: .local i32, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: call __fixsfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.ne
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi half %x to i128
+ %0 = icmp slt i128 %conv, 18446744073709551616
+ %spec.store.select = select i1 %0, i128 %conv, i128 18446744073709551616
+ %1 = icmp sgt i128 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i128 %spec.store.select, i128 0
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+
+
+
+; i32 saturate
+
+define i32 @stest_f64i32_mm(double %x) {
+; CHECK-LABEL: stest_f64i32_mm:
+; CHECK: .functype stest_f64i32_mm (f64) -> (i32)
+; CHECK-NEXT: .local i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i64.trunc_sat_f64_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const 2147483647
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 2147483647
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const -2147483648
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const -2147483648
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi double %x to i64
+ %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647)
+ %spec.store.select7 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 -2147483648)
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @utest_f64i32_mm(double %x) {
+; CHECK-LABEL: utest_f64i32_mm:
+; CHECK: .functype utest_f64i32_mm (f64) -> (i32)
+; CHECK-NEXT: .local i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i64.trunc_sat_f64_u
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui double %x to i64
+ %spec.store.select = call i64 @llvm.umin.i64(i64 %conv, i64 4294967295)
+ %conv6 = trunc i64 %spec.store.select to i32
+ ret i32 %conv6
+}
+
+define i32 @ustest_f64i32_mm(double %x) {
+; CHECK-LABEL: ustest_f64i32_mm:
+; CHECK: .functype ustest_f64i32_mm (f64) -> (i32)
+; CHECK-NEXT: .local i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i64.trunc_sat_f64_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi double %x to i64
+ %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 4294967295)
+ %spec.store.select7 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 0)
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @stest_f32i32_mm(float %x) {
+; CHECK-LABEL: stest_f32i32_mm:
+; CHECK: .functype stest_f32i32_mm (f32) -> (i32)
+; CHECK-NEXT: .local i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const 2147483647
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 2147483647
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const -2147483648
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const -2147483648
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi float %x to i64
+ %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647)
+ %spec.store.select7 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 -2147483648)
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @utest_f32i32_mm(float %x) {
+; CHECK-LABEL: utest_f32i32_mm:
+; CHECK: .functype utest_f32i32_mm (f32) -> (i32)
+; CHECK-NEXT: .local i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i64.trunc_sat_f32_u
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui float %x to i64
+ %spec.store.select = call i64 @llvm.umin.i64(i64 %conv, i64 4294967295)
+ %conv6 = trunc i64 %spec.store.select to i32
+ ret i32 %conv6
+}
+
+define i32 @ustest_f32i32_mm(float %x) {
+; CHECK-LABEL: ustest_f32i32_mm:
+; CHECK: .functype ustest_f32i32_mm (f32) -> (i32)
+; CHECK-NEXT: .local i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi float %x to i64
+ %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 4294967295)
+ %spec.store.select7 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 0)
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @stest_f16i32_mm(half %x) {
+; CHECK-LABEL: stest_f16i32_mm:
+; CHECK: .functype stest_f16i32_mm (f32) -> (i32)
+; CHECK-NEXT: .local i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const 2147483647
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 2147483647
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const -2147483648
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const -2147483648
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi half %x to i64
+ %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647)
+ %spec.store.select7 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 -2147483648)
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @utesth_f16i32_mm(half %x) {
+; CHECK-LABEL: utesth_f16i32_mm:
+; CHECK: .functype utesth_f16i32_mm (f32) -> (i32)
+; CHECK-NEXT: .local i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i64.trunc_sat_f32_u
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui half %x to i64
+ %spec.store.select = call i64 @llvm.umin.i64(i64 %conv, i64 4294967295)
+ %conv6 = trunc i64 %spec.store.select to i32
+ ret i32 %conv6
+}
+
+define i32 @ustest_f16i32_mm(half %x) {
+; CHECK-LABEL: ustest_f16i32_mm:
+; CHECK: .functype ustest_f16i32_mm (f32) -> (i32)
+; CHECK-NEXT: .local i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi half %x to i64
+ %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 4294967295)
+ %spec.store.select7 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 0)
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+; i16 saturate
+
+define i16 @stest_f64i16_mm(double %x) {
+; CHECK-LABEL: stest_f64i16_mm:
+; CHECK: .functype stest_f64i16_mm (f64) -> (i32)
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.trunc_sat_f64_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const 32767
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 32767
+; CHECK-NEXT: i32.lt_s
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const -32768
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const -32768
+; CHECK-NEXT: i32.gt_s
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi double %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 32767)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 -32768)
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @utest_f64i16_mm(double %x) {
+; CHECK-LABEL: utest_f64i16_mm:
+; CHECK: .functype utest_f64i16_mm (f64) -> (i32)
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.trunc_sat_f64_u
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: i32.lt_u
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui double %x to i32
+ %spec.store.select = call i32 @llvm.umin.i32(i32 %conv, i32 65535)
+ %conv6 = trunc i32 %spec.store.select to i16
+ ret i16 %conv6
+}
+
+define i16 @ustest_f64i16_mm(double %x) {
+; CHECK-LABEL: ustest_f64i16_mm:
+; CHECK: .functype ustest_f64i16_mm (f64) -> (i32)
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.trunc_sat_f64_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: i32.lt_s
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: i32.gt_s
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi double %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 65535)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @stest_f32i16_mm(float %x) {
+; CHECK-LABEL: stest_f32i16_mm:
+; CHECK: .functype stest_f32i16_mm (f32) -> (i32)
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const 32767
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 32767
+; CHECK-NEXT: i32.lt_s
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const -32768
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const -32768
+; CHECK-NEXT: i32.gt_s
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi float %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 32767)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 -32768)
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @utest_f32i16_mm(float %x) {
+; CHECK-LABEL: utest_f32i16_mm:
+; CHECK: .functype utest_f32i16_mm (f32) -> (i32)
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: i32.lt_u
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui float %x to i32
+ %spec.store.select = call i32 @llvm.umin.i32(i32 %conv, i32 65535)
+ %conv6 = trunc i32 %spec.store.select to i16
+ ret i16 %conv6
+}
+
+define i16 @ustest_f32i16_mm(float %x) {
+; CHECK-LABEL: ustest_f32i16_mm:
+; CHECK: .functype ustest_f32i16_mm (f32) -> (i32)
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: i32.lt_s
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: i32.gt_s
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi float %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 65535)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @stest_f16i16_mm(half %x) {
+; CHECK-LABEL: stest_f16i16_mm:
+; CHECK: .functype stest_f16i16_mm (f32) -> (i32)
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const 32767
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 32767
+; CHECK-NEXT: i32.lt_s
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const -32768
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const -32768
+; CHECK-NEXT: i32.gt_s
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi half %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 32767)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 -32768)
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @utesth_f16i16_mm(half %x) {
+; CHECK-LABEL: utesth_f16i16_mm:
+; CHECK: .functype utesth_f16i16_mm (f32) -> (i32)
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: i32.lt_u
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui half %x to i32
+ %spec.store.select = call i32 @llvm.umin.i32(i32 %conv, i32 65535)
+ %conv6 = trunc i32 %spec.store.select to i16
+ ret i16 %conv6
+}
+
+define i16 @ustest_f16i16_mm(half %x) {
+; CHECK-LABEL: ustest_f16i16_mm:
+; CHECK: .functype ustest_f16i16_mm (f32) -> (i32)
+; CHECK-NEXT: .local i32
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 65535
+; CHECK-NEXT: i32.lt_s
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 0
+; CHECK-NEXT: i32.gt_s
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi half %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 65535)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+; i64 saturate
+
+define i64 @stest_f64i64_mm(double %x) {
+; CHECK-LABEL: stest_f64i64_mm:
+; CHECK: .functype stest_f64i64_mm (f64) -> (i64)
+; CHECK-NEXT: .local i32, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __fixdfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 3
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: i64.gt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 63
+; CHECK-NEXT: i64.shr_s
+; CHECK-NEXT: i64.and
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi double %x to i128
+ %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
+ %spec.store.select7 = call i128 @llvm.smax.i128(i128 %spec.store.select, i128 -9223372036854775808)
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @utest_f64i64_mm(double %x) {
+; CHECK-LABEL: utest_f64i64_mm:
+; CHECK: .functype utest_f64i64_mm (f64) -> (i64)
+; CHECK-NEXT: .local i32, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __fixunsdfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui double %x to i128
+ %spec.store.select = call i128 @llvm.umin.i128(i128 %conv, i128 18446744073709551616)
+ %conv6 = trunc i128 %spec.store.select to i64
+ ret i64 %conv6
+}
+
+define i64 @ustest_f64i64_mm(double %x) {
+; CHECK-LABEL: ustest_f64i64_mm:
+; CHECK: .functype ustest_f64i64_mm (f64) -> (i64)
+; CHECK-NEXT: .local i32, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __fixdfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 3
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi double %x to i128
+ %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 18446744073709551616)
+ %spec.store.select7 = call i128 @llvm.smax.i128(i128 %spec.store.select, i128 0)
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @stest_f32i64_mm(float %x) {
+; CHECK-LABEL: stest_f32i64_mm:
+; CHECK: .functype stest_f32i64_mm (f32) -> (i64)
+; CHECK-NEXT: .local i32, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __fixsfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 3
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: i64.gt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 63
+; CHECK-NEXT: i64.shr_s
+; CHECK-NEXT: i64.and
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi float %x to i128
+ %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
+ %spec.store.select7 = call i128 @llvm.smax.i128(i128 %spec.store.select, i128 -9223372036854775808)
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @utest_f32i64_mm(float %x) {
+; CHECK-LABEL: utest_f32i64_mm:
+; CHECK: .functype utest_f32i64_mm (f32) -> (i64)
+; CHECK-NEXT: .local i32, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __fixunssfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui float %x to i128
+ %spec.store.select = call i128 @llvm.umin.i128(i128 %conv, i128 18446744073709551616)
+ %conv6 = trunc i128 %spec.store.select to i64
+ ret i64 %conv6
+}
+
+define i64 @ustest_f32i64_mm(float %x) {
+; CHECK-LABEL: ustest_f32i64_mm:
+; CHECK: .functype ustest_f32i64_mm (f32) -> (i64)
+; CHECK-NEXT: .local i32, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __fixsfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 3
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi float %x to i128
+ %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 18446744073709551616)
+ %spec.store.select7 = call i128 @llvm.smax.i128(i128 %spec.store.select, i128 0)
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @stest_f16i64_mm(half %x) {
+; CHECK-LABEL: stest_f16i64_mm:
+; CHECK: .functype stest_f16i64_mm (f32) -> (i64)
+; CHECK-NEXT: .local i32, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: call __fixsfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 3
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: i64.gt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 63
+; CHECK-NEXT: i64.shr_s
+; CHECK-NEXT: i64.and
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi half %x to i128
+ %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
+ %spec.store.select7 = call i128 @llvm.smax.i128(i128 %spec.store.select, i128 -9223372036854775808)
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @utesth_f16i64_mm(half %x) {
+; CHECK-LABEL: utesth_f16i64_mm:
+; CHECK: .functype utesth_f16i64_mm (f32) -> (i64)
+; CHECK-NEXT: .local i32, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: call __fixunssfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui half %x to i128
+ %spec.store.select = call i128 @llvm.umin.i128(i128 %conv, i128 18446744073709551616)
+ %conv6 = trunc i128 %spec.store.select to i64
+ ret i64 %conv6
+}
+
+define i64 @ustest_f16i64_mm(half %x) {
+; CHECK-LABEL: ustest_f16i64_mm:
+; CHECK: .functype ustest_f16i64_mm (f32) -> (i64)
+; CHECK-NEXT: .local i32, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: call __fixsfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 3
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi half %x to i128
+ %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 18446744073709551616)
+ %spec.store.select7 = call i128 @llvm.smax.i128(i128 %spec.store.select, i128 0)
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+declare i32 @llvm.smin.i32(i32, i32)
+declare i32 @llvm.smax.i32(i32, i32)
+declare i32 @llvm.umin.i32(i32, i32)
+declare i64 @llvm.smin.i64(i64, i64)
+declare i64 @llvm.smax.i64(i64, i64)
+declare i64 @llvm.umin.i64(i64, i64)
+declare i128 @llvm.smin.i128(i128, i128)
+declare i128 @llvm.smax.i128(i128, i128)
+declare i128 @llvm.umin.i128(i128, i128)
diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
new file mode 100644
index 0000000000000..a19023ba5c6ec
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
@@ -0,0 +1,3552 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=wasm32-unknown-unknown -mattr=+simd128,+nontrapping-fptoint | FileCheck %s
+
+; i32 saturate
+
+define <2 x i32> @stest_f64i32(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i32:
+; CHECK: .functype stest_f64i32 (v128) -> (v128)
+; CHECK-NEXT: .local v128
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 0
+; CHECK-NEXT: i64.trunc_sat_f64_s
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 1
+; CHECK-NEXT: i64.trunc_sat_f64_s
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: local.tee 0
+; CHECK-NEXT: v128.const 2147483647, 2147483647
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64x2.lt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.tee 0
+; CHECK-NEXT: v128.const -2147483648, -2147483648
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64x2.gt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i64>
+ %0 = icmp slt <2 x i64> %conv, <i64 2147483647, i64 2147483647>
+ %spec.store.select = select <2 x i1> %0, <2 x i64> %conv, <2 x i64> <i64 2147483647, i64 2147483647>
+ %1 = icmp sgt <2 x i64> %spec.store.select, <i64 -2147483648, i64 -2147483648>
+ %spec.store.select7 = select <2 x i1> %1, <2 x i64> %spec.store.select, <2 x i64> <i64 -2147483648, i64 -2147483648>
+ %conv6 = trunc <2 x i64> %spec.store.select7 to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <2 x i32> @utest_f64i32(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i32:
+; CHECK: .functype utest_f64i32 (v128) -> (v128)
+; CHECK-NEXT: .local i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 0
+; CHECK-NEXT: i64.trunc_sat_f64_u
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 1
+; CHECK-NEXT: i64.trunc_sat_f64_u
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: v128.const 4294967295, 4294967295
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i64>
+ %0 = icmp ult <2 x i64> %conv, <i64 4294967295, i64 4294967295>
+ %spec.store.select = select <2 x i1> %0, <2 x i64> %conv, <2 x i64> <i64 4294967295, i64 4294967295>
+ %conv6 = trunc <2 x i64> %spec.store.select to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <2 x i32> @ustest_f64i32(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i32:
+; CHECK: .functype ustest_f64i32 (v128) -> (v128)
+; CHECK-NEXT: .local v128
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 0
+; CHECK-NEXT: i64.trunc_sat_f64_s
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 1
+; CHECK-NEXT: i64.trunc_sat_f64_s
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: local.tee 0
+; CHECK-NEXT: v128.const 4294967295, 4294967295
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64x2.lt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.tee 0
+; CHECK-NEXT: v128.const 0, 0
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64x2.gt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i64>
+ %0 = icmp slt <2 x i64> %conv, <i64 4294967295, i64 4294967295>
+ %spec.store.select = select <2 x i1> %0, <2 x i64> %conv, <2 x i64> <i64 4294967295, i64 4294967295>
+ %1 = icmp sgt <2 x i64> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <2 x i1> %1, <2 x i64> %spec.store.select, <2 x i64> zeroinitializer
+ %conv6 = trunc <2 x i64> %spec.store.select7 to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <4 x i32> @stest_f32i32(<4 x float> %x) {
+; CHECK-LABEL: stest_f32i32:
+; CHECK: .functype stest_f32i32 (v128) -> (v128)
+; CHECK-NEXT: .local v128, v128, v128
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 0
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 1
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: v128.const 2147483647, 2147483647
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64x2.lt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.tee 3
+; CHECK-NEXT: v128.const -2147483648, -2147483648
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64x2.gt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 2
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 3
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: local.tee 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64x2.lt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.tee 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64x2.gt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i64>
+ %0 = icmp slt <4 x i64> %conv, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
+ %1 = icmp sgt <4 x i64> %spec.store.select, <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
+ %spec.store.select7 = select <4 x i1> %1, <4 x i64> %spec.store.select, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @utest_f32i32(<4 x float> %x) {
+; CHECK-LABEL: utest_f32i32:
+; CHECK: .functype utest_f32i32 (v128) -> (v128)
+; CHECK-NEXT: .local i64, i64, v128
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 0
+; CHECK-NEXT: i64.trunc_sat_f32_u
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 1
+; CHECK-NEXT: i64.trunc_sat_f32_u
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: v128.const 4294967295, 4294967295
+; CHECK-NEXT: local.tee 3
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 2
+; CHECK-NEXT: i64.trunc_sat_f32_u
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 3
+; CHECK-NEXT: i64.trunc_sat_f32_u
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui <4 x float> %x to <4 x i64>
+ %0 = icmp ult <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %conv6 = trunc <4 x i64> %spec.store.select to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @ustest_f32i32(<4 x float> %x) {
+; CHECK-LABEL: ustest_f32i32:
+; CHECK: .functype ustest_f32i32 (v128) -> (v128)
+; CHECK-NEXT: .local v128, v128, v128
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 0
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 1
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: v128.const 4294967295, 4294967295
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64x2.lt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.tee 3
+; CHECK-NEXT: v128.const 0, 0
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64x2.gt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 2
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 3
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: local.tee 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64x2.lt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.tee 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64x2.gt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i64>
+ %0 = icmp slt <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %1 = icmp sgt <4 x i64> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <4 x i1> %1, <4 x i64> %spec.store.select, <4 x i64> zeroinitializer
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @stest_f16i32(<4 x half> %x) {
+; CHECK-LABEL: stest_f16i32:
+; CHECK: .functype stest_f16i32 (f32, f32, f32, f32) -> (v128)
+; CHECK-NEXT: .local v128, v128, v128
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: v128.const 2147483647, 2147483647
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64x2.lt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.tee 6
+; CHECK-NEXT: v128.const -2147483648, -2147483648
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64x2.gt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: local.tee 6
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64x2.lt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64x2.gt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i64>
+ %0 = icmp slt <4 x i64> %conv, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
+ %1 = icmp sgt <4 x i64> %spec.store.select, <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
+ %spec.store.select7 = select <4 x i1> %1, <4 x i64> %spec.store.select, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @utesth_f16i32(<4 x half> %x) {
+; CHECK-LABEL: utesth_f16i32:
+; CHECK: .functype utesth_f16i32 (f32, f32, f32, f32) -> (v128)
+; CHECK-NEXT: .local i64, i64, v128
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i64.trunc_sat_f32_u
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.trunc_sat_f32_u
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: v128.const 4294967295, 4294967295
+; CHECK-NEXT: local.tee 6
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.trunc_sat_f32_u
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.trunc_sat_f32_u
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui <4 x half> %x to <4 x i64>
+ %0 = icmp ult <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %conv6 = trunc <4 x i64> %spec.store.select to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @ustest_f16i32(<4 x half> %x) {
+; CHECK-LABEL: ustest_f16i32:
+; CHECK: .functype ustest_f16i32 (f32, f32, f32, f32) -> (v128)
+; CHECK-NEXT: .local v128, v128, v128
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: v128.const 4294967295, 4294967295
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64x2.lt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.tee 6
+; CHECK-NEXT: v128.const 0, 0
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64x2.gt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: local.tee 6
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64x2.lt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64x2.gt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i64>
+ %0 = icmp slt <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %1 = icmp sgt <4 x i64> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <4 x i1> %1, <4 x i64> %spec.store.select, <4 x i64> zeroinitializer
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+; i16 saturate
+
+define <2 x i16> @stest_f64i16(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i16:
+; CHECK: .functype stest_f64i16 (v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 0
+; CHECK-NEXT: i32.trunc_sat_f64_s
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 1
+; CHECK-NEXT: i32.trunc_sat_f64_s
+; CHECK-NEXT: i32x4.replace_lane 1
+; CHECK-NEXT: v128.const 32767, 32767, 0, 0
+; CHECK-NEXT: i32x4.min_s
+; CHECK-NEXT: v128.const -32768, -32768, 0, 0
+; CHECK-NEXT: i32x4.max_s
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i32>
+ %0 = icmp slt <2 x i32> %conv, <i32 32767, i32 32767>
+ %spec.store.select = select <2 x i1> %0, <2 x i32> %conv, <2 x i32> <i32 32767, i32 32767>
+ %1 = icmp sgt <2 x i32> %spec.store.select, <i32 -32768, i32 -32768>
+ %spec.store.select7 = select <2 x i1> %1, <2 x i32> %spec.store.select, <2 x i32> <i32 -32768, i32 -32768>
+ %conv6 = trunc <2 x i32> %spec.store.select7 to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <2 x i16> @utest_f64i16(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i16:
+; CHECK: .functype utest_f64i16 (v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 0
+; CHECK-NEXT: i32.trunc_sat_f64_u
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 1
+; CHECK-NEXT: i32.trunc_sat_f64_u
+; CHECK-NEXT: i32x4.replace_lane 1
+; CHECK-NEXT: v128.const 65535, 65535, 0, 0
+; CHECK-NEXT: i32x4.min_u
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i32>
+ %0 = icmp ult <2 x i32> %conv, <i32 65535, i32 65535>
+ %spec.store.select = select <2 x i1> %0, <2 x i32> %conv, <2 x i32> <i32 65535, i32 65535>
+ %conv6 = trunc <2 x i32> %spec.store.select to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <2 x i16> @ustest_f64i16(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i16:
+; CHECK: .functype ustest_f64i16 (v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 0
+; CHECK-NEXT: i32.trunc_sat_f64_s
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 1
+; CHECK-NEXT: i32.trunc_sat_f64_s
+; CHECK-NEXT: i32x4.replace_lane 1
+; CHECK-NEXT: v128.const 65535, 65535, 0, 0
+; CHECK-NEXT: i32x4.min_s
+; CHECK-NEXT: v128.const 0, 0, 0, 0
+; CHECK-NEXT: i32x4.max_s
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i32>
+ %0 = icmp slt <2 x i32> %conv, <i32 65535, i32 65535>
+ %spec.store.select = select <2 x i1> %0, <2 x i32> %conv, <2 x i32> <i32 65535, i32 65535>
+ %1 = icmp sgt <2 x i32> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <2 x i1> %1, <2 x i32> %spec.store.select, <2 x i32> zeroinitializer
+ %conv6 = trunc <2 x i32> %spec.store.select7 to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <4 x i16> @stest_f32i16(<4 x float> %x) {
+; CHECK-LABEL: stest_f32i16:
+; CHECK: .functype stest_f32i16 (v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_s
+; CHECK-NEXT: v128.const 32767, 32767, 32767, 32767
+; CHECK-NEXT: i32x4.min_s
+; CHECK-NEXT: v128.const -32768, -32768, -32768, -32768
+; CHECK-NEXT: i32x4.max_s
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i32>
+ %0 = icmp slt <4 x i32> %conv, <i32 32767, i32 32767, i32 32767, i32 32767>
+ %spec.store.select = select <4 x i1> %0, <4 x i32> %conv, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>
+ %1 = icmp sgt <4 x i32> %spec.store.select, <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %spec.store.select7 = select <4 x i1> %1, <4 x i32> %spec.store.select, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %conv6 = trunc <4 x i32> %spec.store.select7 to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <4 x i16> @utest_f32i16(<4 x float> %x) {
+; CHECK-LABEL: utest_f32i16:
+; CHECK: .functype utest_f32i16 (v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_u
+; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
+; CHECK-NEXT: i32x4.min_u
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui <4 x float> %x to <4 x i32>
+ %0 = icmp ult <4 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535>
+ %spec.store.select = select <4 x i1> %0, <4 x i32> %conv, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
+ %conv6 = trunc <4 x i32> %spec.store.select to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <4 x i16> @ustest_f32i16(<4 x float> %x) {
+; CHECK-LABEL: ustest_f32i16:
+; CHECK: .functype ustest_f32i16 (v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_s
+; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
+; CHECK-NEXT: i32x4.min_s
+; CHECK-NEXT: v128.const 0, 0, 0, 0
+; CHECK-NEXT: i32x4.max_s
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i32>
+ %0 = icmp slt <4 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535>
+ %spec.store.select = select <4 x i1> %0, <4 x i32> %conv, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
+ %1 = icmp sgt <4 x i32> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <4 x i1> %1, <4 x i32> %spec.store.select, <4 x i32> zeroinitializer
+ %conv6 = trunc <4 x i32> %spec.store.select7 to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <8 x i16> @stest_f16i16(<8 x half> %x) {
+; CHECK-LABEL: stest_f16i16:
+; CHECK: .functype stest_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
+; CHECK-NEXT: .local v128, v128
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 5
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 4
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 6
+; CHECK-NEXT: local.get 7
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 7
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 1
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 2
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 3
+; CHECK-NEXT: v128.const 32767, 32767, 32767, 32767
+; CHECK-NEXT: local.tee 8
+; CHECK-NEXT: i32x4.min_s
+; CHECK-NEXT: v128.const -32768, -32768, -32768, -32768
+; CHECK-NEXT: local.tee 9
+; CHECK-NEXT: i32x4.max_s
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 1
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 2
+; CHECK-NEXT: local.get 7
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 3
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32x4.min_s
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: i32x4.max_s
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <8 x half> %x to <8 x i32>
+ %0 = icmp slt <8 x i32> %conv, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
+ %spec.store.select = select <8 x i1> %0, <8 x i32> %conv, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
+ %1 = icmp sgt <8 x i32> %spec.store.select, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %spec.store.select7 = select <8 x i1> %1, <8 x i32> %spec.store.select, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %conv6 = trunc <8 x i32> %spec.store.select7 to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+define <8 x i16> @utesth_f16i16(<8 x half> %x) {
+; CHECK-LABEL: utesth_f16i16:
+; CHECK: .functype utesth_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
+; CHECK-NEXT: .local v128
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 5
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 4
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 6
+; CHECK-NEXT: local.get 7
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 7
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.replace_lane 1
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.replace_lane 2
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.replace_lane 3
+; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
+; CHECK-NEXT: local.tee 8
+; CHECK-NEXT: i32x4.min_u
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.replace_lane 1
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.replace_lane 2
+; CHECK-NEXT: local.get 7
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.replace_lane 3
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32x4.min_u
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui <8 x half> %x to <8 x i32>
+ %0 = icmp ult <8 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %spec.store.select = select <8 x i1> %0, <8 x i32> %conv, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %conv6 = trunc <8 x i32> %spec.store.select to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+define <8 x i16> @ustest_f16i16(<8 x half> %x) {
+; CHECK-LABEL: ustest_f16i16:
+; CHECK: .functype ustest_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
+; CHECK-NEXT: .local v128, v128
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 5
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 4
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 6
+; CHECK-NEXT: local.get 7
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 7
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 1
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 2
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 3
+; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
+; CHECK-NEXT: local.tee 8
+; CHECK-NEXT: i32x4.min_s
+; CHECK-NEXT: v128.const 0, 0, 0, 0
+; CHECK-NEXT: local.tee 9
+; CHECK-NEXT: i32x4.max_s
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 1
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 2
+; CHECK-NEXT: local.get 7
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 3
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32x4.min_s
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: i32x4.max_s
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <8 x half> %x to <8 x i32>
+ %0 = icmp slt <8 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %spec.store.select = select <8 x i1> %0, <8 x i32> %conv, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %1 = icmp sgt <8 x i32> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <8 x i1> %1, <8 x i32> %spec.store.select, <8 x i32> zeroinitializer
+ %conv6 = trunc <8 x i32> %spec.store.select7 to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+; i64 saturate
+
+define <2 x i64> @stest_f64i64(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i64:
+; CHECK: .functype stest_f64i64 (v128) -> (v128)
+; CHECK-NEXT: .local i32, i64, i64, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 1
+; CHECK-NEXT: call __fixdfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 0
+; CHECK-NEXT: call __fixdfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 16
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 4
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 5
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: i64.gt_u
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: i64.gt_u
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 9223372036854775807, i128 9223372036854775807>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>
+ %1 = icmp sgt <2 x i128> %spec.store.select, <i128 -9223372036854775808, i128 -9223372036854775808>
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utest_f64i64(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i64:
+; CHECK: .functype utest_f64i64 (v128) -> (v128)
+; CHECK-NEXT: .local i32, i64, i64, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 1
+; CHECK-NEXT: call __fixunsdfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 0
+; CHECK-NEXT: call __fixunsdfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 16
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 4
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 5
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i128>
+ %0 = icmp ult <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f64i64(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i64:
+; CHECK: .functype ustest_f64i64 (v128) -> (v128)
+; CHECK-NEXT: .local i32, i64, i64, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 1
+; CHECK-NEXT: call __fixdfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 0
+; CHECK-NEXT: call __fixdfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 16
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 4
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 5
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.ne
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.ne
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %1 = icmp sgt <2 x i128> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> zeroinitializer
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @stest_f32i64(<2 x float> %x) {
+; CHECK-LABEL: stest_f32i64:
+; CHECK: .functype stest_f32i64 (v128) -> (v128)
+; CHECK-NEXT: .local i32, i64, i64, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 1
+; CHECK-NEXT: call __fixsfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 0
+; CHECK-NEXT: call __fixsfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 16
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 4
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 5
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: i64.gt_u
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: i64.gt_u
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <2 x float> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 9223372036854775807, i128 9223372036854775807>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>
+ %1 = icmp sgt <2 x i128> %spec.store.select, <i128 -9223372036854775808, i128 -9223372036854775808>
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utest_f32i64(<2 x float> %x) {
+; CHECK-LABEL: utest_f32i64:
+; CHECK: .functype utest_f32i64 (v128) -> (v128)
+; CHECK-NEXT: .local i32, i64, i64, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 1
+; CHECK-NEXT: call __fixunssfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 0
+; CHECK-NEXT: call __fixunssfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 16
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 4
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 5
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui <2 x float> %x to <2 x i128>
+ %0 = icmp ult <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f32i64(<2 x float> %x) {
+; CHECK-LABEL: ustest_f32i64:
+; CHECK: .functype ustest_f32i64 (v128) -> (v128)
+; CHECK-NEXT: .local i32, i64, i64, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 1
+; CHECK-NEXT: call __fixsfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 0
+; CHECK-NEXT: call __fixsfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 16
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 4
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 5
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.ne
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.ne
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <2 x float> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %1 = icmp sgt <2 x i128> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> zeroinitializer
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @stest_f16i64(<2 x half> %x) {
+; CHECK-LABEL: stest_f16i64:
+; CHECK: .functype stest_f16i64 (f32, f32) -> (v128)
+; CHECK-NEXT: .local i32, i64, i64, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: call __fixsfti
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: call __fixsfti
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.load 16
+; CHECK-NEXT: local.set 4
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 5
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 6
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 6
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: i64.gt_u
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: i64.gt_u
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 3
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <2 x half> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 9223372036854775807, i128 9223372036854775807>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>
+ %1 = icmp sgt <2 x i128> %spec.store.select, <i128 -9223372036854775808, i128 -9223372036854775808>
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utesth_f16i64(<2 x half> %x) {
+; CHECK-LABEL: utesth_f16i64:
+; CHECK: .functype utesth_f16i64 (f32, f32) -> (v128)
+; CHECK-NEXT: .local i32, i64, i64, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: call __fixunssfti
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: call __fixunssfti
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.load 16
+; CHECK-NEXT: local.set 4
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 5
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 6
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui <2 x half> %x to <2 x i128>
+ %0 = icmp ult <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f16i64(<2 x half> %x) {
+; CHECK-LABEL: ustest_f16i64:
+; CHECK: .functype ustest_f16i64 (f32, f32) -> (v128)
+; CHECK-NEXT: .local i32, i64, i64, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: call __fixsfti
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: call __fixsfti
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.load 16
+; CHECK-NEXT: local.set 4
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 5
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 6
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 6
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.ne
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.ne
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i32.select
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <2 x half> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %1 = icmp sgt <2 x i128> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> zeroinitializer
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+
+
+; i32 saturate
+
+define <2 x i32> @stest_f64i32_mm(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i32_mm:
+; CHECK: .functype stest_f64i32_mm (v128) -> (v128)
+; CHECK-NEXT: .local v128
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 0
+; CHECK-NEXT: i64.trunc_sat_f64_s
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 1
+; CHECK-NEXT: i64.trunc_sat_f64_s
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: local.tee 0
+; CHECK-NEXT: v128.const 2147483647, 2147483647
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64x2.lt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.tee 0
+; CHECK-NEXT: v128.const -2147483648, -2147483648
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64x2.gt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i64>
+ %spec.store.select = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %conv, <2 x i64> <i64 2147483647, i64 2147483647>)
+ %spec.store.select7 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %spec.store.select, <2 x i64> <i64 -2147483648, i64 -2147483648>)
+ %conv6 = trunc <2 x i64> %spec.store.select7 to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <2 x i32> @utest_f64i32_mm(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i32_mm:
+; CHECK: .functype utest_f64i32_mm (v128) -> (v128)
+; CHECK-NEXT: .local i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 0
+; CHECK-NEXT: i64.trunc_sat_f64_u
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 1
+; CHECK-NEXT: i64.trunc_sat_f64_u
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: v128.const 4294967295, 4294967295
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i64>
+ %spec.store.select = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %conv, <2 x i64> <i64 4294967295, i64 4294967295>)
+ %conv6 = trunc <2 x i64> %spec.store.select to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i32_mm:
+; CHECK: .functype ustest_f64i32_mm (v128) -> (v128)
+; CHECK-NEXT: .local v128
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 0
+; CHECK-NEXT: i64.trunc_sat_f64_s
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 1
+; CHECK-NEXT: i64.trunc_sat_f64_s
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: local.tee 0
+; CHECK-NEXT: v128.const 4294967295, 4294967295
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64x2.lt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.tee 0
+; CHECK-NEXT: v128.const 0, 0
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64x2.gt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i64>
+ %spec.store.select = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %conv, <2 x i64> <i64 4294967295, i64 4294967295>)
+ %spec.store.select7 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %spec.store.select, <2 x i64> zeroinitializer)
+ %conv6 = trunc <2 x i64> %spec.store.select7 to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <4 x i32> @stest_f32i32_mm(<4 x float> %x) {
+; CHECK-LABEL: stest_f32i32_mm:
+; CHECK: .functype stest_f32i32_mm (v128) -> (v128)
+; CHECK-NEXT: .local v128, v128, v128
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 0
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 1
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: v128.const 2147483647, 2147483647
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64x2.lt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.tee 3
+; CHECK-NEXT: v128.const -2147483648, -2147483648
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64x2.gt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 2
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 3
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: local.tee 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64x2.lt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.tee 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64x2.gt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
+ %spec.store.select7 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %spec.store.select, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @utest_f32i32_mm(<4 x float> %x) {
+; CHECK-LABEL: utest_f32i32_mm:
+; CHECK: .functype utest_f32i32_mm (v128) -> (v128)
+; CHECK-NEXT: .local i64, i64, v128
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 0
+; CHECK-NEXT: i64.trunc_sat_f32_u
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 1
+; CHECK-NEXT: i64.trunc_sat_f32_u
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: v128.const 4294967295, 4294967295
+; CHECK-NEXT: local.tee 3
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 2
+; CHECK-NEXT: i64.trunc_sat_f32_u
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 3
+; CHECK-NEXT: i64.trunc_sat_f32_u
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui <4 x float> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
+ %conv6 = trunc <4 x i64> %spec.store.select to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
+; CHECK-LABEL: ustest_f32i32_mm:
+; CHECK: .functype ustest_f32i32_mm (v128) -> (v128)
+; CHECK-NEXT: .local v128, v128, v128
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 0
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 1
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: v128.const 4294967295, 4294967295
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64x2.lt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.tee 3
+; CHECK-NEXT: v128.const 0, 0
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64x2.gt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 2
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 3
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: local.tee 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64x2.lt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.tee 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64x2.gt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
+ %spec.store.select7 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %spec.store.select, <4 x i64> zeroinitializer)
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
+; CHECK-LABEL: stest_f16i32_mm:
+; CHECK: .functype stest_f16i32_mm (f32, f32, f32, f32) -> (v128)
+; CHECK-NEXT: .local v128, v128, v128
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: v128.const 2147483647, 2147483647
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64x2.lt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.tee 6
+; CHECK-NEXT: v128.const -2147483648, -2147483648
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64x2.gt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: local.tee 6
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64x2.lt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64x2.gt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
+ %spec.store.select7 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %spec.store.select, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
+; CHECK-LABEL: utesth_f16i32_mm:
+; CHECK: .functype utesth_f16i32_mm (f32, f32, f32, f32) -> (v128)
+; CHECK-NEXT: .local i64, i64, v128
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i64.trunc_sat_f32_u
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.trunc_sat_f32_u
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: v128.const 4294967295, 4294967295
+; CHECK-NEXT: local.tee 6
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.trunc_sat_f32_u
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.trunc_sat_f32_u
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 4294967295
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui <4 x half> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
+ %conv6 = trunc <4 x i64> %spec.store.select to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
+; CHECK-LABEL: ustest_f16i32_mm:
+; CHECK: .functype ustest_f16i32_mm (f32, f32, f32, f32) -> (v128)
+; CHECK-NEXT: .local v128, v128, v128
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: v128.const 4294967295, 4294967295
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64x2.lt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.tee 6
+; CHECK-NEXT: v128.const 0, 0
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64x2.gt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.trunc_sat_f32_s
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: local.tee 6
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64x2.lt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64x2.gt_s
+; CHECK-NEXT: v128.bitselect
+; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
+ %spec.store.select7 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %spec.store.select, <4 x i64> zeroinitializer)
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+; i16 saturate
+
+define <2 x i16> @stest_f64i16_mm(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i16_mm:
+; CHECK: .functype stest_f64i16_mm (v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 0
+; CHECK-NEXT: i32.trunc_sat_f64_s
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 1
+; CHECK-NEXT: i32.trunc_sat_f64_s
+; CHECK-NEXT: i32x4.replace_lane 1
+; CHECK-NEXT: v128.const 32767, 32767, 0, 0
+; CHECK-NEXT: i32x4.min_s
+; CHECK-NEXT: v128.const -32768, -32768, 0, 0
+; CHECK-NEXT: i32x4.max_s
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i32>
+ %spec.store.select = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %conv, <2 x i32> <i32 32767, i32 32767>)
+ %spec.store.select7 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %spec.store.select, <2 x i32> <i32 -32768, i32 -32768>)
+ %conv6 = trunc <2 x i32> %spec.store.select7 to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <2 x i16> @utest_f64i16_mm(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i16_mm:
+; CHECK: .functype utest_f64i16_mm (v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 0
+; CHECK-NEXT: i32.trunc_sat_f64_u
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 1
+; CHECK-NEXT: i32.trunc_sat_f64_u
+; CHECK-NEXT: i32x4.replace_lane 1
+; CHECK-NEXT: v128.const 65535, 65535, 0, 0
+; CHECK-NEXT: i32x4.min_u
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i32>
+ %spec.store.select = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %conv, <2 x i32> <i32 65535, i32 65535>)
+ %conv6 = trunc <2 x i32> %spec.store.select to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <2 x i16> @ustest_f64i16_mm(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i16_mm:
+; CHECK: .functype ustest_f64i16_mm (v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 0
+; CHECK-NEXT: i32.trunc_sat_f64_s
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 1
+; CHECK-NEXT: i32.trunc_sat_f64_s
+; CHECK-NEXT: i32x4.replace_lane 1
+; CHECK-NEXT: v128.const 65535, 65535, 0, 0
+; CHECK-NEXT: i32x4.min_s
+; CHECK-NEXT: v128.const 0, 0, 0, 0
+; CHECK-NEXT: i32x4.max_s
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i32>
+ %spec.store.select = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %conv, <2 x i32> <i32 65535, i32 65535>)
+ %spec.store.select7 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %spec.store.select, <2 x i32> zeroinitializer)
+ %conv6 = trunc <2 x i32> %spec.store.select7 to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <4 x i16> @stest_f32i16_mm(<4 x float> %x) {
+; CHECK-LABEL: stest_f32i16_mm:
+; CHECK: .functype stest_f32i16_mm (v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_s
+; CHECK-NEXT: v128.const 32767, 32767, 32767, 32767
+; CHECK-NEXT: i32x4.min_s
+; CHECK-NEXT: v128.const -32768, -32768, -32768, -32768
+; CHECK-NEXT: i32x4.max_s
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i32>
+ %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %conv, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
+ %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
+ %conv6 = trunc <4 x i32> %spec.store.select7 to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <4 x i16> @utest_f32i16_mm(<4 x float> %x) {
+; CHECK-LABEL: utest_f32i16_mm:
+; CHECK: .functype utest_f32i16_mm (v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_u
+; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
+; CHECK-NEXT: i32x4.min_u
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui <4 x float> %x to <4 x i32>
+ %spec.store.select = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %conv, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
+ %conv6 = trunc <4 x i32> %spec.store.select to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) {
+; CHECK-LABEL: ustest_f32i16_mm:
+; CHECK: .functype ustest_f32i16_mm (v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32x4.trunc_sat_f32x4_s
+; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
+; CHECK-NEXT: i32x4.min_s
+; CHECK-NEXT: v128.const 0, 0, 0, 0
+; CHECK-NEXT: i32x4.max_s
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i32>
+ %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %conv, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
+ %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer)
+ %conv6 = trunc <4 x i32> %spec.store.select7 to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
+; CHECK-LABEL: stest_f16i16_mm:
+; CHECK: .functype stest_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
+; CHECK-NEXT: .local v128, v128
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 5
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 4
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 6
+; CHECK-NEXT: local.get 7
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 7
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 1
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 2
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 3
+; CHECK-NEXT: v128.const 32767, 32767, 32767, 32767
+; CHECK-NEXT: local.tee 8
+; CHECK-NEXT: i32x4.min_s
+; CHECK-NEXT: v128.const -32768, -32768, -32768, -32768
+; CHECK-NEXT: local.tee 9
+; CHECK-NEXT: i32x4.max_s
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 1
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 2
+; CHECK-NEXT: local.get 7
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 3
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32x4.min_s
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: i32x4.max_s
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <8 x half> %x to <8 x i32>
+ %spec.store.select = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %conv, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>)
+ %spec.store.select7 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %spec.store.select, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
+ %conv6 = trunc <8 x i32> %spec.store.select7 to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
+; CHECK-LABEL: utesth_f16i16_mm:
+; CHECK: .functype utesth_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
+; CHECK-NEXT: .local v128
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 5
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 4
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 6
+; CHECK-NEXT: local.get 7
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 7
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.replace_lane 1
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.replace_lane 2
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.replace_lane 3
+; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
+; CHECK-NEXT: local.tee 8
+; CHECK-NEXT: i32x4.min_u
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.replace_lane 1
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.replace_lane 2
+; CHECK-NEXT: local.get 7
+; CHECK-NEXT: i32.trunc_sat_f32_u
+; CHECK-NEXT: i32x4.replace_lane 3
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32x4.min_u
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui <8 x half> %x to <8 x i32>
+ %spec.store.select = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %conv, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>)
+ %conv6 = trunc <8 x i32> %spec.store.select to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
+; CHECK-LABEL: ustest_f16i16_mm:
+; CHECK: .functype ustest_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
+; CHECK-NEXT: .local v128, v128
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 5
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 4
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 6
+; CHECK-NEXT: local.get 7
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 7
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: local.set 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 1
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 2
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 3
+; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
+; CHECK-NEXT: local.tee 8
+; CHECK-NEXT: i32x4.min_s
+; CHECK-NEXT: v128.const 0, 0, 0, 0
+; CHECK-NEXT: local.tee 9
+; CHECK-NEXT: i32x4.max_s
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.splat
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 1
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 2
+; CHECK-NEXT: local.get 7
+; CHECK-NEXT: i32.trunc_sat_f32_s
+; CHECK-NEXT: i32x4.replace_lane 3
+; CHECK-NEXT: local.get 8
+; CHECK-NEXT: i32x4.min_s
+; CHECK-NEXT: local.get 9
+; CHECK-NEXT: i32x4.max_s
+; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <8 x half> %x to <8 x i32>
+ %spec.store.select = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %conv, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>)
+ %spec.store.select7 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %spec.store.select, <8 x i32> zeroinitializer)
+ %conv6 = trunc <8 x i32> %spec.store.select7 to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+; i64 saturate
+
+define <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i64_mm:
+; CHECK: .functype stest_f64i64_mm (v128) -> (v128)
+; CHECK-NEXT: .local i32, i64, i64, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 1
+; CHECK-NEXT: call __fixdfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 0
+; CHECK-NEXT: call __fixdfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 16
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 4
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 5
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: i64.gt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 63
+; CHECK-NEXT: i64.shr_s
+; CHECK-NEXT: i64.and
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: i64.gt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 63
+; CHECK-NEXT: i64.shr_s
+; CHECK-NEXT: i64.and
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i64_mm:
+; CHECK: .functype utest_f64i64_mm (v128) -> (v128)
+; CHECK-NEXT: .local i32, i64, i64, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 1
+; CHECK-NEXT: call __fixunsdfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 0
+; CHECK-NEXT: call __fixunsdfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 16
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 4
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 5
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i64_mm:
+; CHECK: .functype ustest_f64i64_mm (v128) -> (v128)
+; CHECK-NEXT: .local i32, i64, i64, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 1
+; CHECK-NEXT: call __fixdfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.extract_lane 0
+; CHECK-NEXT: call __fixdfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 16
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 4
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 5
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> zeroinitializer)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
+; CHECK-LABEL: stest_f32i64_mm:
+; CHECK: .functype stest_f32i64_mm (v128) -> (v128)
+; CHECK-NEXT: .local i32, i64, i64, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 1
+; CHECK-NEXT: call __fixsfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 0
+; CHECK-NEXT: call __fixsfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 16
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 4
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 5
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: i64.gt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 63
+; CHECK-NEXT: i64.shr_s
+; CHECK-NEXT: i64.and
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: i64.gt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 63
+; CHECK-NEXT: i64.shr_s
+; CHECK-NEXT: i64.and
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <2 x float> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
+; CHECK-LABEL: utest_f32i64_mm:
+; CHECK: .functype utest_f32i64_mm (v128) -> (v128)
+; CHECK-NEXT: .local i32, i64, i64, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 1
+; CHECK-NEXT: call __fixunssfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 0
+; CHECK-NEXT: call __fixunssfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 16
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 4
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 5
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui <2 x float> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
+; CHECK-LABEL: ustest_f32i64_mm:
+; CHECK: .functype ustest_f32i64_mm (v128) -> (v128)
+; CHECK-NEXT: .local i32, i64, i64, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 1
+; CHECK-NEXT: call __fixsfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.extract_lane 0
+; CHECK-NEXT: call __fixsfti
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 16
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 4
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 5
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 4
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <2 x float> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> zeroinitializer)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
+; CHECK-LABEL: stest_f16i64_mm:
+; CHECK: .functype stest_f16i64_mm (f32, f32) -> (v128)
+; CHECK-NEXT: .local i32, i64, i64, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: call __fixsfti
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: call __fixsfti
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.load 16
+; CHECK-NEXT: local.set 4
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 5
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 6
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 6
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: i64.gt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 63
+; CHECK-NEXT: i64.shr_s
+; CHECK-NEXT: i64.and
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: i64.lt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 9223372036854775807
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: i64.gt_u
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const -9223372036854775808
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 63
+; CHECK-NEXT: i64.shr_s
+; CHECK-NEXT: i64.and
+; CHECK-NEXT: local.tee 3
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const -1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <2 x half> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
+; CHECK-LABEL: utesth_f16i64_mm:
+; CHECK: .functype utesth_f16i64_mm (f32, f32) -> (v128)
+; CHECK-NEXT: .local i32, i64, i64, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: call __fixunssfti
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: call __fixunssfti
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.load 16
+; CHECK-NEXT: local.set 4
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 5
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 6
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptoui <2 x half> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
+; CHECK-LABEL: ustest_f16i64_mm:
+; CHECK: .functype ustest_f16i64_mm (f32, f32) -> (v128)
+; CHECK-NEXT: .local i32, i64, i64, i64, i64
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get __stack_pointer
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.sub
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: call __fixsfti
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: call __truncsfhf2
+; CHECK-NEXT: call __extendhfsf2
+; CHECK-NEXT: call __fixsfti
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 16
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 3
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.load 16
+; CHECK-NEXT: local.set 4
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 8
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 5
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.load 0
+; CHECK-NEXT: local.set 6
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i32.const 32
+; CHECK-NEXT: i32.add
+; CHECK-NEXT: global.set __stack_pointer
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 6
+; CHECK-NEXT: local.get 6
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.splat
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 4
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.lt_s
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: i64.eq
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 5
+; CHECK-NEXT: local.get 5
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.const 1
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.tee 3
+; CHECK-NEXT: i64.const 0
+; CHECK-NEXT: i64.gt_s
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: local.get 3
+; CHECK-NEXT: i64.eqz
+; CHECK-NEXT: i64.select
+; CHECK-NEXT: i64x2.replace_lane 1
+; CHECK-NEXT: # fallthrough-return
+entry:
+ %conv = fptosi <2 x half> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> zeroinitializer)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)
+declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
+declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>)
+declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>)
+declare <8 x i32> @llvm.umin.v8i32(<8 x i32>, <8 x i32>)
+declare <2 x i64> @llvm.smin.v2i64(<2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.smax.v2i64(<2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.umin.v2i64(<2 x i64>, <2 x i64>)
+declare <4 x i64> @llvm.smin.v4i64(<4 x i64>, <4 x i64>)
+declare <4 x i64> @llvm.smax.v4i64(<4 x i64>, <4 x i64>)
+declare <4 x i64> @llvm.umin.v4i64(<4 x i64>, <4 x i64>)
+declare <2 x i128> @llvm.smin.v2i128(<2 x i128>, <2 x i128>)
+declare <2 x i128> @llvm.smax.v2i128(<2 x i128>, <2 x i128>)
+declare <2 x i128> @llvm.umin.v2i128(<2 x i128>, <2 x i128>)
diff --git a/llvm/test/CodeGen/X86/fpclamptosat.ll b/llvm/test/CodeGen/X86/fpclamptosat.ll
new file mode 100644
index 0000000000000..dd21c081f891a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fpclamptosat.ll
@@ -0,0 +1,1335 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+; i32 saturate
+
+define i32 @stest_f64i32(double %x) {
+; CHECK-LABEL: stest_f64i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttsd2si %xmm0, %rax
+; CHECK-NEXT: cmpq $2147483647, %rax # imm = 0x7FFFFFFF
+; CHECK-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF
+; CHECK-NEXT: cmovlq %rax, %rcx
+; CHECK-NEXT: cmpq $-2147483647, %rcx # imm = 0x80000001
+; CHECK-NEXT: movl $-2147483648, %eax # imm = 0x80000000
+; CHECK-NEXT: cmovgel %ecx, %eax
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi double %x to i64
+ %0 = icmp slt i64 %conv, 2147483647
+ %spec.store.select = select i1 %0, i64 %conv, i64 2147483647
+ %1 = icmp sgt i64 %spec.store.select, -2147483648
+ %spec.store.select7 = select i1 %1, i64 %spec.store.select, i64 -2147483648
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @utest_f64i32(double %x) {
+; CHECK-LABEL: utest_f64i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttsd2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: sarq $63, %rcx
+; CHECK-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttsd2si %xmm0, %rdx
+; CHECK-NEXT: andq %rcx, %rdx
+; CHECK-NEXT: orq %rax, %rdx
+; CHECK-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
+; CHECK-NEXT: cmpq %rax, %rdx
+; CHECK-NEXT: movl $-1, %eax
+; CHECK-NEXT: cmovbl %edx, %eax
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui double %x to i64
+ %0 = icmp ult i64 %conv, 4294967295
+ %spec.store.select = select i1 %0, i64 %conv, i64 4294967295
+ %conv6 = trunc i64 %spec.store.select to i32
+ ret i32 %conv6
+}
+
+define i32 @ustest_f64i32(double %x) {
+; CHECK-LABEL: ustest_f64i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttsd2si %xmm0, %rcx
+; CHECK-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
+; CHECK-NEXT: cmpq %rax, %rcx
+; CHECK-NEXT: cmovlq %rcx, %rax
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: cmovlel %ecx, %eax
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi double %x to i64
+ %0 = icmp slt i64 %conv, 4294967295
+ %spec.store.select = select i1 %0, i64 %conv, i64 4294967295
+ %1 = icmp sgt i64 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i64 %spec.store.select, i64 0
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @stest_f32i32(float %x) {
+; CHECK-LABEL: stest_f32i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: cmpq $2147483647, %rax # imm = 0x7FFFFFFF
+; CHECK-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF
+; CHECK-NEXT: cmovlq %rax, %rcx
+; CHECK-NEXT: cmpq $-2147483647, %rcx # imm = 0x80000001
+; CHECK-NEXT: movl $-2147483648, %eax # imm = 0x80000000
+; CHECK-NEXT: cmovgel %ecx, %eax
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi float %x to i64
+ %0 = icmp slt i64 %conv, 2147483647
+ %spec.store.select = select i1 %0, i64 %conv, i64 2147483647
+ %1 = icmp sgt i64 %spec.store.select, -2147483648
+ %spec.store.select7 = select i1 %1, i64 %spec.store.select, i64 -2147483648
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @utest_f32i32(float %x) {
+; CHECK-LABEL: utest_f32i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: sarq $63, %rcx
+; CHECK-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %rdx
+; CHECK-NEXT: andq %rcx, %rdx
+; CHECK-NEXT: orq %rax, %rdx
+; CHECK-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
+; CHECK-NEXT: cmpq %rax, %rdx
+; CHECK-NEXT: movl $-1, %eax
+; CHECK-NEXT: cmovbl %edx, %eax
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui float %x to i64
+ %0 = icmp ult i64 %conv, 4294967295
+ %spec.store.select = select i1 %0, i64 %conv, i64 4294967295
+ %conv6 = trunc i64 %spec.store.select to i32
+ ret i32 %conv6
+}
+
+define i32 @ustest_f32i32(float %x) {
+; CHECK-LABEL: ustest_f32i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttss2si %xmm0, %rcx
+; CHECK-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
+; CHECK-NEXT: cmpq %rax, %rcx
+; CHECK-NEXT: cmovlq %rcx, %rax
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: cmovlel %ecx, %eax
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi float %x to i64
+ %0 = icmp slt i64 %conv, 4294967295
+ %spec.store.select = select i1 %0, i64 %conv, i64 4294967295
+ %1 = icmp sgt i64 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i64 %spec.store.select, i64 0
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @stest_f16i32(half %x) {
+; CHECK-LABEL: stest_f16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movzwl %di, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: cmpq $2147483647, %rax # imm = 0x7FFFFFFF
+; CHECK-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF
+; CHECK-NEXT: cmovlq %rax, %rcx
+; CHECK-NEXT: cmpq $-2147483647, %rcx # imm = 0x80000001
+; CHECK-NEXT: movl $-2147483648, %eax # imm = 0x80000000
+; CHECK-NEXT: cmovgel %ecx, %eax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi half %x to i64
+ %0 = icmp slt i64 %conv, 2147483647
+ %spec.store.select = select i1 %0, i64 %conv, i64 2147483647
+ %1 = icmp sgt i64 %spec.store.select, -2147483648
+ %spec.store.select7 = select i1 %1, i64 %spec.store.select, i64 -2147483648
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @utesth_f16i32(half %x) {
+; CHECK-LABEL: utesth_f16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movzwl %di, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: sarq $63, %rcx
+; CHECK-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %rdx
+; CHECK-NEXT: andq %rcx, %rdx
+; CHECK-NEXT: orq %rax, %rdx
+; CHECK-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
+; CHECK-NEXT: cmpq %rax, %rdx
+; CHECK-NEXT: movl $-1, %eax
+; CHECK-NEXT: cmovbl %edx, %eax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui half %x to i64
+ %0 = icmp ult i64 %conv, 4294967295
+ %spec.store.select = select i1 %0, i64 %conv, i64 4294967295
+ %conv6 = trunc i64 %spec.store.select to i32
+ ret i32 %conv6
+}
+
+define i32 @ustest_f16i32(half %x) {
+; CHECK-LABEL: ustest_f16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movzwl %di, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rcx
+; CHECK-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF
+; CHECK-NEXT: cmpq %rax, %rcx
+; CHECK-NEXT: cmovlq %rcx, %rax
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: cmovlel %ecx, %eax
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi half %x to i64
+ %0 = icmp slt i64 %conv, 4294967295
+ %spec.store.select = select i1 %0, i64 %conv, i64 4294967295
+ %1 = icmp sgt i64 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i64 %spec.store.select, i64 0
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+; i16 saturate
+
+define i16 @stest_f64i16(double %x) {
+; CHECK-LABEL: stest_f64i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttsd2si %xmm0, %eax
+; CHECK-NEXT: cmpl $32767, %eax # imm = 0x7FFF
+; CHECK-NEXT: movl $32767, %ecx # imm = 0x7FFF
+; CHECK-NEXT: cmovll %eax, %ecx
+; CHECK-NEXT: cmpl $-32767, %ecx # imm = 0x8001
+; CHECK-NEXT: movl $32768, %eax # imm = 0x8000
+; CHECK-NEXT: cmovgel %ecx, %eax
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi double %x to i32
+ %0 = icmp slt i32 %conv, 32767
+ %spec.store.select = select i1 %0, i32 %conv, i32 32767
+ %1 = icmp sgt i32 %spec.store.select, -32768
+ %spec.store.select7 = select i1 %1, i32 %spec.store.select, i32 -32768
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @utest_f64i16(double %x) {
+; CHECK-LABEL: utest_f64i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttsd2si %xmm0, %rcx
+; CHECK-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
+; CHECK-NEXT: movl $65535, %eax # imm = 0xFFFF
+; CHECK-NEXT: cmovbl %ecx, %eax
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui double %x to i32
+ %0 = icmp ult i32 %conv, 65535
+ %spec.store.select = select i1 %0, i32 %conv, i32 65535
+ %conv6 = trunc i32 %spec.store.select to i16
+ ret i16 %conv6
+}
+
+define i16 @ustest_f64i16(double %x) {
+; CHECK-LABEL: ustest_f64i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttsd2si %xmm0, %eax
+; CHECK-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; CHECK-NEXT: movl $65535, %ecx # imm = 0xFFFF
+; CHECK-NEXT: cmovll %eax, %ecx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testl %ecx, %ecx
+; CHECK-NEXT: cmovgl %ecx, %eax
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi double %x to i32
+ %0 = icmp slt i32 %conv, 65535
+ %spec.store.select = select i1 %0, i32 %conv, i32 65535
+ %1 = icmp sgt i32 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i32 %spec.store.select, i32 0
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @stest_f32i16(float %x) {
+; CHECK-LABEL: stest_f32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: cmpl $32767, %eax # imm = 0x7FFF
+; CHECK-NEXT: movl $32767, %ecx # imm = 0x7FFF
+; CHECK-NEXT: cmovll %eax, %ecx
+; CHECK-NEXT: cmpl $-32767, %ecx # imm = 0x8001
+; CHECK-NEXT: movl $32768, %eax # imm = 0x8000
+; CHECK-NEXT: cmovgel %ecx, %eax
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi float %x to i32
+ %0 = icmp slt i32 %conv, 32767
+ %spec.store.select = select i1 %0, i32 %conv, i32 32767
+ %1 = icmp sgt i32 %spec.store.select, -32768
+ %spec.store.select7 = select i1 %1, i32 %spec.store.select, i32 -32768
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @utest_f32i16(float %x) {
+; CHECK-LABEL: utest_f32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttss2si %xmm0, %rcx
+; CHECK-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
+; CHECK-NEXT: movl $65535, %eax # imm = 0xFFFF
+; CHECK-NEXT: cmovbl %ecx, %eax
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui float %x to i32
+ %0 = icmp ult i32 %conv, 65535
+ %spec.store.select = select i1 %0, i32 %conv, i32 65535
+ %conv6 = trunc i32 %spec.store.select to i16
+ ret i16 %conv6
+}
+
+define i16 @ustest_f32i16(float %x) {
+; CHECK-LABEL: ustest_f32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; CHECK-NEXT: movl $65535, %ecx # imm = 0xFFFF
+; CHECK-NEXT: cmovll %eax, %ecx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testl %ecx, %ecx
+; CHECK-NEXT: cmovgl %ecx, %eax
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi float %x to i32
+ %0 = icmp slt i32 %conv, 65535
+ %spec.store.select = select i1 %0, i32 %conv, i32 65535
+ %1 = icmp sgt i32 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i32 %spec.store.select, i32 0
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @stest_f16i16(half %x) {
+; CHECK-LABEL: stest_f16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movzwl %di, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: cmpl $32767, %eax # imm = 0x7FFF
+; CHECK-NEXT: movl $32767, %ecx # imm = 0x7FFF
+; CHECK-NEXT: cmovll %eax, %ecx
+; CHECK-NEXT: cmpl $-32767, %ecx # imm = 0x8001
+; CHECK-NEXT: movl $32768, %eax # imm = 0x8000
+; CHECK-NEXT: cmovgel %ecx, %eax
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi half %x to i32
+ %0 = icmp slt i32 %conv, 32767
+ %spec.store.select = select i1 %0, i32 %conv, i32 32767
+ %1 = icmp sgt i32 %spec.store.select, -32768
+ %spec.store.select7 = select i1 %1, i32 %spec.store.select, i32 -32768
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @utesth_f16i16(half %x) {
+; CHECK-LABEL: utesth_f16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movzwl %di, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rcx
+; CHECK-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
+; CHECK-NEXT: movl $65535, %eax # imm = 0xFFFF
+; CHECK-NEXT: cmovbl %ecx, %eax
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui half %x to i32
+ %0 = icmp ult i32 %conv, 65535
+ %spec.store.select = select i1 %0, i32 %conv, i32 65535
+ %conv6 = trunc i32 %spec.store.select to i16
+ ret i16 %conv6
+}
+
+define i16 @ustest_f16i16(half %x) {
+; CHECK-LABEL: ustest_f16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movzwl %di, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; CHECK-NEXT: movl $65535, %ecx # imm = 0xFFFF
+; CHECK-NEXT: cmovll %eax, %ecx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testl %ecx, %ecx
+; CHECK-NEXT: cmovgl %ecx, %eax
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi half %x to i32
+ %0 = icmp slt i32 %conv, 65535
+ %spec.store.select = select i1 %0, i32 %conv, i32 65535
+ %1 = icmp sgt i32 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i32 %spec.store.select, i32 0
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+; i64 saturate
+
+define i64 @stest_f64i64(double %x) {
+; CHECK-LABEL: stest_f64i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: callq __fixdfti at PLT
+; CHECK-NEXT: xorl %esi, %esi
+; CHECK-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
+; CHECK-NEXT: cmpq %rcx, %rax
+; CHECK-NEXT: movq %rdx, %rdi
+; CHECK-NEXT: sbbq $0, %rdi
+; CHECK-NEXT: cmovlq %rdx, %rsi
+; CHECK-NEXT: cmovlq %rax, %rcx
+; CHECK-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; CHECK-NEXT: cmpq %rcx, %rax
+; CHECK-NEXT: movq $-1, %rdx
+; CHECK-NEXT: sbbq %rsi, %rdx
+; CHECK-NEXT: cmovgeq %rax, %rcx
+; CHECK-NEXT: movq %rcx, %rax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi double %x to i128
+ %0 = icmp slt i128 %conv, 9223372036854775807
+ %spec.store.select = select i1 %0, i128 %conv, i128 9223372036854775807
+ %1 = icmp sgt i128 %spec.store.select, -9223372036854775808
+ %spec.store.select7 = select i1 %1, i128 %spec.store.select, i128 -9223372036854775808
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @utest_f64i64(double %x) {
+; CHECK-LABEL: utest_f64i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: callq __fixunsdfti at PLT
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: cmovneq %rcx, %rax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui double %x to i128
+ %0 = icmp ult i128 %conv, 18446744073709551616
+ %spec.store.select = select i1 %0, i128 %conv, i128 18446744073709551616
+ %conv6 = trunc i128 %spec.store.select to i64
+ ret i64 %conv6
+}
+
+define i64 @ustest_f64i64(double %x) {
+; CHECK-LABEL: ustest_f64i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: callq __fixdfti at PLT
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: movl $1, %esi
+; CHECK-NEXT: cmovleq %rdx, %rsi
+; CHECK-NEXT: cmovgq %rcx, %rax
+; CHECK-NEXT: movq %rax, %rdx
+; CHECK-NEXT: negq %rdx
+; CHECK-NEXT: movl $0, %edx
+; CHECK-NEXT: sbbq %rsi, %rdx
+; CHECK-NEXT: cmovgeq %rcx, %rax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi double %x to i128
+ %0 = icmp slt i128 %conv, 18446744073709551616
+ %spec.store.select = select i1 %0, i128 %conv, i128 18446744073709551616
+ %1 = icmp sgt i128 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i128 %spec.store.select, i128 0
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @stest_f32i64(float %x) {
+; CHECK-LABEL: stest_f32i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: callq __fixsfti at PLT
+; CHECK-NEXT: xorl %esi, %esi
+; CHECK-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
+; CHECK-NEXT: cmpq %rcx, %rax
+; CHECK-NEXT: movq %rdx, %rdi
+; CHECK-NEXT: sbbq $0, %rdi
+; CHECK-NEXT: cmovlq %rdx, %rsi
+; CHECK-NEXT: cmovlq %rax, %rcx
+; CHECK-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; CHECK-NEXT: cmpq %rcx, %rax
+; CHECK-NEXT: movq $-1, %rdx
+; CHECK-NEXT: sbbq %rsi, %rdx
+; CHECK-NEXT: cmovgeq %rax, %rcx
+; CHECK-NEXT: movq %rcx, %rax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi float %x to i128
+ %0 = icmp slt i128 %conv, 9223372036854775807
+ %spec.store.select = select i1 %0, i128 %conv, i128 9223372036854775807
+ %1 = icmp sgt i128 %spec.store.select, -9223372036854775808
+ %spec.store.select7 = select i1 %1, i128 %spec.store.select, i128 -9223372036854775808
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @utest_f32i64(float %x) {
+; CHECK-LABEL: utest_f32i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: callq __fixunssfti at PLT
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: cmovneq %rcx, %rax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui float %x to i128
+ %0 = icmp ult i128 %conv, 18446744073709551616
+ %spec.store.select = select i1 %0, i128 %conv, i128 18446744073709551616
+ %conv6 = trunc i128 %spec.store.select to i64
+ ret i64 %conv6
+}
+
+define i64 @ustest_f32i64(float %x) {
+; CHECK-LABEL: ustest_f32i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: callq __fixsfti at PLT
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: movl $1, %esi
+; CHECK-NEXT: cmovleq %rdx, %rsi
+; CHECK-NEXT: cmovgq %rcx, %rax
+; CHECK-NEXT: movq %rax, %rdx
+; CHECK-NEXT: negq %rdx
+; CHECK-NEXT: movl $0, %edx
+; CHECK-NEXT: sbbq %rsi, %rdx
+; CHECK-NEXT: cmovgeq %rcx, %rax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi float %x to i128
+ %0 = icmp slt i128 %conv, 18446744073709551616
+ %spec.store.select = select i1 %0, i128 %conv, i128 18446744073709551616
+ %1 = icmp sgt i128 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i128 %spec.store.select, i128 0
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @stest_f16i64(half %x) {
+; CHECK-LABEL: stest_f16i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movzwl %di, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: callq __fixsfti at PLT
+; CHECK-NEXT: xorl %esi, %esi
+; CHECK-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
+; CHECK-NEXT: cmpq %rcx, %rax
+; CHECK-NEXT: movq %rdx, %rdi
+; CHECK-NEXT: sbbq $0, %rdi
+; CHECK-NEXT: cmovlq %rdx, %rsi
+; CHECK-NEXT: cmovlq %rax, %rcx
+; CHECK-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; CHECK-NEXT: cmpq %rcx, %rax
+; CHECK-NEXT: movq $-1, %rdx
+; CHECK-NEXT: sbbq %rsi, %rdx
+; CHECK-NEXT: cmovgeq %rax, %rcx
+; CHECK-NEXT: movq %rcx, %rax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi half %x to i128
+ %0 = icmp slt i128 %conv, 9223372036854775807
+ %spec.store.select = select i1 %0, i128 %conv, i128 9223372036854775807
+ %1 = icmp sgt i128 %spec.store.select, -9223372036854775808
+ %spec.store.select7 = select i1 %1, i128 %spec.store.select, i128 -9223372036854775808
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @utesth_f16i64(half %x) {
+; CHECK-LABEL: utesth_f16i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movzwl %di, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: callq __fixunssfti at PLT
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: cmovneq %rcx, %rax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui half %x to i128
+ %0 = icmp ult i128 %conv, 18446744073709551616
+ %spec.store.select = select i1 %0, i128 %conv, i128 18446744073709551616
+ %conv6 = trunc i128 %spec.store.select to i64
+ ret i64 %conv6
+}
+
+define i64 @ustest_f16i64(half %x) {
+; CHECK-LABEL: ustest_f16i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movzwl %di, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: callq __fixsfti at PLT
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: movl $1, %esi
+; CHECK-NEXT: cmovleq %rdx, %rsi
+; CHECK-NEXT: cmovgq %rcx, %rax
+; CHECK-NEXT: movq %rax, %rdx
+; CHECK-NEXT: negq %rdx
+; CHECK-NEXT: movl $0, %edx
+; CHECK-NEXT: sbbq %rsi, %rdx
+; CHECK-NEXT: cmovgeq %rcx, %rax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi half %x to i128
+ %0 = icmp slt i128 %conv, 18446744073709551616
+ %spec.store.select = select i1 %0, i128 %conv, i128 18446744073709551616
+ %1 = icmp sgt i128 %spec.store.select, 0
+ %spec.store.select7 = select i1 %1, i128 %spec.store.select, i128 0
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+
+
+
+; i32 saturate
+
+define i32 @stest_f64i32_mm(double %x) {
+; CHECK-LABEL: stest_f64i32_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttsd2si %xmm0, %rax
+; CHECK-NEXT: cmpq $2147483647, %rax # imm = 0x7FFFFFFF
+; CHECK-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF
+; CHECK-NEXT: cmovlq %rax, %rcx
+; CHECK-NEXT: cmpq $-2147483647, %rcx # imm = 0x80000001
+; CHECK-NEXT: movq $-2147483648, %rax # imm = 0x80000000
+; CHECK-NEXT: cmovgeq %rcx, %rax
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi double %x to i64
+ %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647)
+ %spec.store.select7 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 -2147483648)
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @utest_f64i32_mm(double %x) {
+; CHECK-LABEL: utest_f64i32_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttsd2si %xmm0, %rcx
+; CHECK-NEXT: movq %rcx, %rdx
+; CHECK-NEXT: sarq $63, %rdx
+; CHECK-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttsd2si %xmm0, %rax
+; CHECK-NEXT: andq %rdx, %rax
+; CHECK-NEXT: orq %rcx, %rax
+; CHECK-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
+; CHECK-NEXT: cmpq %rcx, %rax
+; CHECK-NEXT: cmovaeq %rcx, %rax
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui double %x to i64
+ %spec.store.select = call i64 @llvm.umin.i64(i64 %conv, i64 4294967295)
+ %conv6 = trunc i64 %spec.store.select to i32
+ ret i32 %conv6
+}
+
+define i32 @ustest_f64i32_mm(double %x) {
+; CHECK-LABEL: ustest_f64i32_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttsd2si %xmm0, %rax
+; CHECK-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
+; CHECK-NEXT: cmpq %rcx, %rax
+; CHECK-NEXT: cmovlq %rax, %rcx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testq %rcx, %rcx
+; CHECK-NEXT: cmovgq %rcx, %rax
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi double %x to i64
+ %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 4294967295)
+ %spec.store.select7 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 0)
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @stest_f32i32_mm(float %x) {
+; CHECK-LABEL: stest_f32i32_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: cmpq $2147483647, %rax # imm = 0x7FFFFFFF
+; CHECK-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF
+; CHECK-NEXT: cmovlq %rax, %rcx
+; CHECK-NEXT: cmpq $-2147483647, %rcx # imm = 0x80000001
+; CHECK-NEXT: movq $-2147483648, %rax # imm = 0x80000000
+; CHECK-NEXT: cmovgeq %rcx, %rax
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi float %x to i64
+ %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647)
+ %spec.store.select7 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 -2147483648)
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @utest_f32i32_mm(float %x) {
+; CHECK-LABEL: utest_f32i32_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttss2si %xmm0, %rcx
+; CHECK-NEXT: movq %rcx, %rdx
+; CHECK-NEXT: sarq $63, %rdx
+; CHECK-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: andq %rdx, %rax
+; CHECK-NEXT: orq %rcx, %rax
+; CHECK-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
+; CHECK-NEXT: cmpq %rcx, %rax
+; CHECK-NEXT: cmovaeq %rcx, %rax
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui float %x to i64
+ %spec.store.select = call i64 @llvm.umin.i64(i64 %conv, i64 4294967295)
+ %conv6 = trunc i64 %spec.store.select to i32
+ ret i32 %conv6
+}
+
+define i32 @ustest_f32i32_mm(float %x) {
+; CHECK-LABEL: ustest_f32i32_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
+; CHECK-NEXT: cmpq %rcx, %rax
+; CHECK-NEXT: cmovlq %rax, %rcx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testq %rcx, %rcx
+; CHECK-NEXT: cmovgq %rcx, %rax
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi float %x to i64
+ %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 4294967295)
+ %spec.store.select7 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 0)
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @stest_f16i32_mm(half %x) {
+; CHECK-LABEL: stest_f16i32_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movzwl %di, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: cmpq $2147483647, %rax # imm = 0x7FFFFFFF
+; CHECK-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF
+; CHECK-NEXT: cmovlq %rax, %rcx
+; CHECK-NEXT: cmpq $-2147483647, %rcx # imm = 0x80000001
+; CHECK-NEXT: movq $-2147483648, %rax # imm = 0x80000000
+; CHECK-NEXT: cmovgeq %rcx, %rax
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi half %x to i64
+ %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647)
+ %spec.store.select7 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 -2147483648)
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+define i32 @utesth_f16i32_mm(half %x) {
+; CHECK-LABEL: utesth_f16i32_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movzwl %di, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rcx
+; CHECK-NEXT: movq %rcx, %rdx
+; CHECK-NEXT: sarq $63, %rdx
+; CHECK-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: andq %rdx, %rax
+; CHECK-NEXT: orq %rcx, %rax
+; CHECK-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
+; CHECK-NEXT: cmpq %rcx, %rax
+; CHECK-NEXT: cmovaeq %rcx, %rax
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui half %x to i64
+ %spec.store.select = call i64 @llvm.umin.i64(i64 %conv, i64 4294967295)
+ %conv6 = trunc i64 %spec.store.select to i32
+ ret i32 %conv6
+}
+
+define i32 @ustest_f16i32_mm(half %x) {
+; CHECK-LABEL: ustest_f16i32_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movzwl %di, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF
+; CHECK-NEXT: cmpq %rcx, %rax
+; CHECK-NEXT: cmovlq %rax, %rcx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testq %rcx, %rcx
+; CHECK-NEXT: cmovgq %rcx, %rax
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi half %x to i64
+ %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 4294967295)
+ %spec.store.select7 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 0)
+ %conv6 = trunc i64 %spec.store.select7 to i32
+ ret i32 %conv6
+}
+
+; i16 saturate
+
+define i16 @stest_f64i16_mm(double %x) {
+; CHECK-LABEL: stest_f64i16_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttsd2si %xmm0, %eax
+; CHECK-NEXT: cmpl $32767, %eax # imm = 0x7FFF
+; CHECK-NEXT: movl $32767, %ecx # imm = 0x7FFF
+; CHECK-NEXT: cmovll %eax, %ecx
+; CHECK-NEXT: cmpl $-32767, %ecx # imm = 0x8001
+; CHECK-NEXT: movl $-32768, %eax # imm = 0x8000
+; CHECK-NEXT: cmovgel %ecx, %eax
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi double %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 32767)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 -32768)
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @utest_f64i16_mm(double %x) {
+; CHECK-LABEL: utest_f64i16_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttsd2si %xmm0, %rcx
+; CHECK-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
+; CHECK-NEXT: movl $65535, %eax # imm = 0xFFFF
+; CHECK-NEXT: cmovbl %ecx, %eax
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui double %x to i32
+ %spec.store.select = call i32 @llvm.umin.i32(i32 %conv, i32 65535)
+ %conv6 = trunc i32 %spec.store.select to i16
+ ret i16 %conv6
+}
+
+define i16 @ustest_f64i16_mm(double %x) {
+; CHECK-LABEL: ustest_f64i16_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttsd2si %xmm0, %eax
+; CHECK-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; CHECK-NEXT: movl $65535, %ecx # imm = 0xFFFF
+; CHECK-NEXT: cmovll %eax, %ecx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testl %ecx, %ecx
+; CHECK-NEXT: cmovgl %ecx, %eax
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi double %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 65535)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @stest_f32i16_mm(float %x) {
+; CHECK-LABEL: stest_f32i16_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: cmpl $32767, %eax # imm = 0x7FFF
+; CHECK-NEXT: movl $32767, %ecx # imm = 0x7FFF
+; CHECK-NEXT: cmovll %eax, %ecx
+; CHECK-NEXT: cmpl $-32767, %ecx # imm = 0x8001
+; CHECK-NEXT: movl $-32768, %eax # imm = 0x8000
+; CHECK-NEXT: cmovgel %ecx, %eax
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi float %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 32767)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 -32768)
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @utest_f32i16_mm(float %x) {
+; CHECK-LABEL: utest_f32i16_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttss2si %xmm0, %rcx
+; CHECK-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
+; CHECK-NEXT: movl $65535, %eax # imm = 0xFFFF
+; CHECK-NEXT: cmovbl %ecx, %eax
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui float %x to i32
+ %spec.store.select = call i32 @llvm.umin.i32(i32 %conv, i32 65535)
+ %conv6 = trunc i32 %spec.store.select to i16
+ ret i16 %conv6
+}
+
+define i16 @ustest_f32i16_mm(float %x) {
+; CHECK-LABEL: ustest_f32i16_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; CHECK-NEXT: movl $65535, %ecx # imm = 0xFFFF
+; CHECK-NEXT: cmovll %eax, %ecx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testl %ecx, %ecx
+; CHECK-NEXT: cmovgl %ecx, %eax
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi float %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 65535)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @stest_f16i16_mm(half %x) {
+; CHECK-LABEL: stest_f16i16_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movzwl %di, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: cmpl $32767, %eax # imm = 0x7FFF
+; CHECK-NEXT: movl $32767, %ecx # imm = 0x7FFF
+; CHECK-NEXT: cmovll %eax, %ecx
+; CHECK-NEXT: cmpl $-32767, %ecx # imm = 0x8001
+; CHECK-NEXT: movl $-32768, %eax # imm = 0x8000
+; CHECK-NEXT: cmovgel %ecx, %eax
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi half %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 32767)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 -32768)
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+define i16 @utesth_f16i16_mm(half %x) {
+; CHECK-LABEL: utesth_f16i16_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movzwl %di, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rcx
+; CHECK-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
+; CHECK-NEXT: movl $65535, %eax # imm = 0xFFFF
+; CHECK-NEXT: cmovbl %ecx, %eax
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui half %x to i32
+ %spec.store.select = call i32 @llvm.umin.i32(i32 %conv, i32 65535)
+ %conv6 = trunc i32 %spec.store.select to i16
+ ret i16 %conv6
+}
+
+define i16 @ustest_f16i16_mm(half %x) {
+; CHECK-LABEL: ustest_f16i16_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movzwl %di, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; CHECK-NEXT: movl $65535, %ecx # imm = 0xFFFF
+; CHECK-NEXT: cmovll %eax, %ecx
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testl %ecx, %ecx
+; CHECK-NEXT: cmovgl %ecx, %eax
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi half %x to i32
+ %spec.store.select = call i32 @llvm.smin.i32(i32 %conv, i32 65535)
+ %spec.store.select7 = call i32 @llvm.smax.i32(i32 %spec.store.select, i32 0)
+ %conv6 = trunc i32 %spec.store.select7 to i16
+ ret i16 %conv6
+}
+
+; i64 saturate
+
+define i64 @stest_f64i64_mm(double %x) {
+; CHECK-LABEL: stest_f64i64_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: callq __fixdfti at PLT
+; CHECK-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
+; CHECK-NEXT: cmpq %rcx, %rax
+; CHECK-NEXT: movq %rcx, %rsi
+; CHECK-NEXT: cmovbq %rax, %rsi
+; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: cmovsq %rax, %rcx
+; CHECK-NEXT: cmoveq %rsi, %rcx
+; CHECK-NEXT: cmovsq %rdx, %rdi
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; CHECK-NEXT: movq %rax, %rdx
+; CHECK-NEXT: cmovnsq %rcx, %rdx
+; CHECK-NEXT: cmpq %rax, %rcx
+; CHECK-NEXT: cmovbeq %rax, %rcx
+; CHECK-NEXT: cmpq $-1, %rdi
+; CHECK-NEXT: cmovneq %rdx, %rcx
+; CHECK-NEXT: movq %rcx, %rax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi double %x to i128
+ %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
+ %spec.store.select7 = call i128 @llvm.smax.i128(i128 %spec.store.select, i128 -9223372036854775808)
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @utest_f64i64_mm(double %x) {
+; CHECK-LABEL: utest_f64i64_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: callq __fixunsdfti at PLT
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: cmovneq %rcx, %rax
+; CHECK-NEXT: cmpq $1, %rdx
+; CHECK-NEXT: cmoveq %rcx, %rax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui double %x to i128
+ %spec.store.select = call i128 @llvm.umin.i128(i128 %conv, i128 18446744073709551616)
+ %conv6 = trunc i128 %spec.store.select to i64
+ ret i64 %conv6
+}
+
+define i64 @ustest_f64i64_mm(double %x) {
+; CHECK-LABEL: ustest_f64i64_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: callq __fixdfti at PLT
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: movl $1, %esi
+; CHECK-NEXT: cmovleq %rdx, %rsi
+; CHECK-NEXT: cmovgq %rcx, %rax
+; CHECK-NEXT: cmpq $1, %rdx
+; CHECK-NEXT: cmoveq %rcx, %rax
+; CHECK-NEXT: testq %rsi, %rsi
+; CHECK-NEXT: cmovsq %rcx, %rax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi double %x to i128
+ %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 18446744073709551616)
+ %spec.store.select7 = call i128 @llvm.smax.i128(i128 %spec.store.select, i128 0)
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @stest_f32i64_mm(float %x) {
+; CHECK-LABEL: stest_f32i64_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: callq __fixsfti at PLT
+; CHECK-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
+; CHECK-NEXT: cmpq %rcx, %rax
+; CHECK-NEXT: movq %rcx, %rsi
+; CHECK-NEXT: cmovbq %rax, %rsi
+; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: cmovsq %rax, %rcx
+; CHECK-NEXT: cmoveq %rsi, %rcx
+; CHECK-NEXT: cmovsq %rdx, %rdi
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; CHECK-NEXT: movq %rax, %rdx
+; CHECK-NEXT: cmovnsq %rcx, %rdx
+; CHECK-NEXT: cmpq %rax, %rcx
+; CHECK-NEXT: cmovbeq %rax, %rcx
+; CHECK-NEXT: cmpq $-1, %rdi
+; CHECK-NEXT: cmovneq %rdx, %rcx
+; CHECK-NEXT: movq %rcx, %rax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi float %x to i128
+ %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
+ %spec.store.select7 = call i128 @llvm.smax.i128(i128 %spec.store.select, i128 -9223372036854775808)
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @utest_f32i64_mm(float %x) {
+; CHECK-LABEL: utest_f32i64_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: callq __fixunssfti at PLT
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: cmovneq %rcx, %rax
+; CHECK-NEXT: cmpq $1, %rdx
+; CHECK-NEXT: cmoveq %rcx, %rax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui float %x to i128
+ %spec.store.select = call i128 @llvm.umin.i128(i128 %conv, i128 18446744073709551616)
+ %conv6 = trunc i128 %spec.store.select to i64
+ ret i64 %conv6
+}
+
+define i64 @ustest_f32i64_mm(float %x) {
+; CHECK-LABEL: ustest_f32i64_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: callq __fixsfti at PLT
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: movl $1, %esi
+; CHECK-NEXT: cmovleq %rdx, %rsi
+; CHECK-NEXT: cmovgq %rcx, %rax
+; CHECK-NEXT: cmpq $1, %rdx
+; CHECK-NEXT: cmoveq %rcx, %rax
+; CHECK-NEXT: testq %rsi, %rsi
+; CHECK-NEXT: cmovsq %rcx, %rax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi float %x to i128
+ %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 18446744073709551616)
+ %spec.store.select7 = call i128 @llvm.smax.i128(i128 %spec.store.select, i128 0)
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @stest_f16i64_mm(half %x) {
+; CHECK-LABEL: stest_f16i64_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movzwl %di, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: callq __fixsfti at PLT
+; CHECK-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
+; CHECK-NEXT: cmpq %rcx, %rax
+; CHECK-NEXT: movq %rcx, %rsi
+; CHECK-NEXT: cmovbq %rax, %rsi
+; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: cmovsq %rax, %rcx
+; CHECK-NEXT: cmoveq %rsi, %rcx
+; CHECK-NEXT: cmovsq %rdx, %rdi
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; CHECK-NEXT: movq %rax, %rdx
+; CHECK-NEXT: cmovnsq %rcx, %rdx
+; CHECK-NEXT: cmpq %rax, %rcx
+; CHECK-NEXT: cmovbeq %rax, %rcx
+; CHECK-NEXT: cmpq $-1, %rdi
+; CHECK-NEXT: cmovneq %rdx, %rcx
+; CHECK-NEXT: movq %rcx, %rax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi half %x to i128
+ %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807)
+ %spec.store.select7 = call i128 @llvm.smax.i128(i128 %spec.store.select, i128 -9223372036854775808)
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+define i64 @utesth_f16i64_mm(half %x) {
+; CHECK-LABEL: utesth_f16i64_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movzwl %di, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: callq __fixunssfti at PLT
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: cmovneq %rcx, %rax
+; CHECK-NEXT: cmpq $1, %rdx
+; CHECK-NEXT: cmoveq %rcx, %rax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui half %x to i128
+ %spec.store.select = call i128 @llvm.umin.i128(i128 %conv, i128 18446744073709551616)
+ %conv6 = trunc i128 %spec.store.select to i64
+ ret i64 %conv6
+}
+
+define i64 @ustest_f16i64_mm(half %x) {
+; CHECK-LABEL: ustest_f16i64_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movzwl %di, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: callq __fixsfti at PLT
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: movl $1, %esi
+; CHECK-NEXT: cmovleq %rdx, %rsi
+; CHECK-NEXT: cmovgq %rcx, %rax
+; CHECK-NEXT: cmpq $1, %rdx
+; CHECK-NEXT: cmoveq %rcx, %rax
+; CHECK-NEXT: testq %rsi, %rsi
+; CHECK-NEXT: cmovsq %rcx, %rax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi half %x to i128
+ %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 18446744073709551616)
+ %spec.store.select7 = call i128 @llvm.smax.i128(i128 %spec.store.select, i128 0)
+ %conv6 = trunc i128 %spec.store.select7 to i64
+ ret i64 %conv6
+}
+
+declare i32 @llvm.smin.i32(i32, i32)
+declare i32 @llvm.smax.i32(i32, i32)
+declare i32 @llvm.umin.i32(i32, i32)
+declare i64 @llvm.smin.i64(i64, i64)
+declare i64 @llvm.smax.i64(i64, i64)
+declare i64 @llvm.umin.i64(i64, i64)
+declare i128 @llvm.smin.i128(i128, i128)
+declare i128 @llvm.smax.i128(i128, i128)
+declare i128 @llvm.umin.i128(i128, i128)
diff --git a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll
new file mode 100644
index 0000000000000..312fad5302ef1
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll
@@ -0,0 +1,3473 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+; i32 saturate
+
+define <2 x i32> @stest_f64i32(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttsd2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm1
+; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: cvttsd2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648]
+; CHECK-NEXT: movdqa %xmm1, %xmm2
+; CHECK-NEXT: pxor %xmm0, %xmm2
+; CHECK-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
+; CHECK-NEXT: movdqa %xmm3, %xmm4
+; CHECK-NEXT: pcmpgtd %xmm2, %xmm4
+; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm3, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-NEXT: pand %xmm5, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; CHECK-NEXT: por %xmm2, %xmm3
+; CHECK-NEXT: pand %xmm3, %xmm1
+; CHECK-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
+; CHECK-NEXT: por %xmm1, %xmm3
+; CHECK-NEXT: pxor %xmm3, %xmm0
+; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [18446744069414584320,18446744069414584320]
+; CHECK-NEXT: movdqa %xmm0, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm1, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; CHECK-NEXT: pand %xmm4, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; CHECK-NEXT: por %xmm0, %xmm1
+; CHECK-NEXT: pand %xmm1, %xmm3
+; CHECK-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NEXT: por %xmm3, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i64>
+ %0 = icmp slt <2 x i64> %conv, <i64 2147483647, i64 2147483647>
+ %spec.store.select = select <2 x i1> %0, <2 x i64> %conv, <2 x i64> <i64 2147483647, i64 2147483647>
+ %1 = icmp sgt <2 x i64> %spec.store.select, <i64 -2147483648, i64 -2147483648>
+ %spec.store.select7 = select <2 x i1> %1, <2 x i64> %spec.store.select, <2 x i64> <i64 -2147483648, i64 -2147483648>
+ %conv6 = trunc <2 x i64> %spec.store.select7 to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <2 x i32> @utest_f64i32(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
+; CHECK-NEXT: movapd %xmm0, %xmm1
+; CHECK-NEXT: subsd %xmm2, %xmm1
+; CHECK-NEXT: cvttsd2si %xmm1, %rax
+; CHECK-NEXT: cvttsd2si %xmm0, %rcx
+; CHECK-NEXT: movq %rcx, %rdx
+; CHECK-NEXT: sarq $63, %rdx
+; CHECK-NEXT: andq %rax, %rdx
+; CHECK-NEXT: orq %rcx, %rdx
+; CHECK-NEXT: movq %rdx, %xmm1
+; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: cvttsd2si %xmm0, %rax
+; CHECK-NEXT: subsd %xmm2, %xmm0
+; CHECK-NEXT: cvttsd2si %xmm0, %rcx
+; CHECK-NEXT: movq %rax, %rdx
+; CHECK-NEXT: sarq $63, %rdx
+; CHECK-NEXT: andq %rcx, %rdx
+; CHECK-NEXT: orq %rax, %rdx
+; CHECK-NEXT: movq %rdx, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
+; CHECK-NEXT: pxor %xmm1, %xmm0
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259455,9223372039002259455]
+; CHECK-NEXT: movdqa %xmm2, %xmm3
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm2, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; CHECK-NEXT: pand %xmm4, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
+; CHECK-NEXT: por %xmm0, %xmm2
+; CHECK-NEXT: pand %xmm2, %xmm1
+; CHECK-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-NEXT: por %xmm1, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i64>
+ %0 = icmp ult <2 x i64> %conv, <i64 4294967295, i64 4294967295>
+ %spec.store.select = select <2 x i1> %0, <2 x i64> %conv, <2 x i64> <i64 4294967295, i64 4294967295>
+ %conv6 = trunc <2 x i64> %spec.store.select to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <2 x i32> @ustest_f64i32(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttsd2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm1
+; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: cvttsd2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648]
+; CHECK-NEXT: movdqa %xmm1, %xmm2
+; CHECK-NEXT: pxor %xmm0, %xmm2
+; CHECK-NEXT: movdqa {{.*#+}} xmm3 = [2147483647,2147483647]
+; CHECK-NEXT: movdqa %xmm3, %xmm4
+; CHECK-NEXT: pcmpgtd %xmm2, %xmm4
+; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm3, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-NEXT: pand %xmm5, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; CHECK-NEXT: por %xmm2, %xmm3
+; CHECK-NEXT: pand %xmm3, %xmm1
+; CHECK-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
+; CHECK-NEXT: por %xmm1, %xmm3
+; CHECK-NEXT: movdqa %xmm3, %xmm1
+; CHECK-NEXT: pxor %xmm0, %xmm1
+; CHECK-NEXT: movdqa %xmm1, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm2
+; CHECK-NEXT: pcmpeqd %xmm0, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; CHECK-NEXT: por %xmm0, %xmm1
+; CHECK-NEXT: pand %xmm3, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i64>
+ %0 = icmp slt <2 x i64> %conv, <i64 4294967295, i64 4294967295>
+ %spec.store.select = select <2 x i1> %0, <2 x i64> %conv, <2 x i64> <i64 4294967295, i64 4294967295>
+ %1 = icmp sgt <2 x i64> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <2 x i1> %1, <2 x i64> %spec.store.select, <2 x i64> zeroinitializer
+ %conv6 = trunc <2 x i64> %spec.store.select7 to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <4 x i32> @stest_f32i32(<4 x float> %x) {
+; CHECK-LABEL: stest_f32i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movaps %xmm0, %xmm1
+; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3]
+; CHECK-NEXT: cvttss2si %xmm1, %rax
+; CHECK-NEXT: movq %rax, %xmm1
+; CHECK-NEXT: movaps %xmm0, %xmm2
+; CHECK-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
+; CHECK-NEXT: cvttss2si %xmm2, %rax
+; CHECK-NEXT: movq %rax, %xmm2
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm4
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm0[0]
+; CHECK-NEXT: movdqa {{.*#+}} xmm8 = [2147483647,2147483647]
+; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648]
+; CHECK-NEXT: movdqa %xmm4, %xmm1
+; CHECK-NEXT: pxor %xmm0, %xmm1
+; CHECK-NEXT: movdqa {{.*#+}} xmm5 = [4294967295,4294967295]
+; CHECK-NEXT: movdqa %xmm5, %xmm6
+; CHECK-NEXT: pcmpgtd %xmm1, %xmm6
+; CHECK-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm5, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; CHECK-NEXT: pand %xmm7, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm6[1,1,3,3]
+; CHECK-NEXT: por %xmm3, %xmm1
+; CHECK-NEXT: pand %xmm1, %xmm4
+; CHECK-NEXT: pandn %xmm8, %xmm1
+; CHECK-NEXT: por %xmm4, %xmm1
+; CHECK-NEXT: movdqa %xmm2, %xmm3
+; CHECK-NEXT: pxor %xmm0, %xmm3
+; CHECK-NEXT: movdqa %xmm5, %xmm4
+; CHECK-NEXT: pcmpgtd %xmm3, %xmm4
+; CHECK-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm5, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; CHECK-NEXT: pand %xmm6, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; CHECK-NEXT: por %xmm3, %xmm4
+; CHECK-NEXT: pand %xmm4, %xmm2
+; CHECK-NEXT: pandn %xmm8, %xmm4
+; CHECK-NEXT: por %xmm2, %xmm4
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [18446744071562067968,18446744071562067968]
+; CHECK-NEXT: movdqa %xmm4, %xmm3
+; CHECK-NEXT: pxor %xmm0, %xmm3
+; CHECK-NEXT: movdqa {{.*#+}} xmm5 = [18446744069414584320,18446744069414584320]
+; CHECK-NEXT: movdqa %xmm3, %xmm6
+; CHECK-NEXT: pcmpgtd %xmm5, %xmm6
+; CHECK-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm5, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; CHECK-NEXT: pand %xmm7, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; CHECK-NEXT: por %xmm3, %xmm6
+; CHECK-NEXT: pand %xmm6, %xmm4
+; CHECK-NEXT: pandn %xmm2, %xmm6
+; CHECK-NEXT: por %xmm4, %xmm6
+; CHECK-NEXT: pxor %xmm1, %xmm0
+; CHECK-NEXT: movdqa %xmm0, %xmm3
+; CHECK-NEXT: pcmpgtd %xmm5, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm5, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
+; CHECK-NEXT: pand %xmm4, %xmm5
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
+; CHECK-NEXT: por %xmm5, %xmm0
+; CHECK-NEXT: pand %xmm0, %xmm1
+; CHECK-NEXT: pandn %xmm2, %xmm0
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm6[0,2]
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i64>
+ %0 = icmp slt <4 x i64> %conv, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
+ %1 = icmp sgt <4 x i64> %spec.store.select, <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
+ %spec.store.select7 = select <4 x i1> %1, <4 x i64> %spec.store.select, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @utest_f32i32(<4 x float> %x) {
+; CHECK-LABEL: utest_f32i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK-NEXT: movaps %xmm0, %xmm1
+; CHECK-NEXT: subss %xmm2, %xmm1
+; CHECK-NEXT: cvttss2si %xmm1, %rax
+; CHECK-NEXT: cvttss2si %xmm0, %rcx
+; CHECK-NEXT: movq %rcx, %rdx
+; CHECK-NEXT: sarq $63, %rdx
+; CHECK-NEXT: andq %rax, %rdx
+; CHECK-NEXT: orq %rcx, %rdx
+; CHECK-NEXT: movq %rdx, %xmm1
+; CHECK-NEXT: movaps %xmm0, %xmm3
+; CHECK-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[1,1]
+; CHECK-NEXT: cvttss2si %xmm3, %rax
+; CHECK-NEXT: subss %xmm2, %xmm3
+; CHECK-NEXT: cvttss2si %xmm3, %rcx
+; CHECK-NEXT: movq %rax, %rdx
+; CHECK-NEXT: sarq $63, %rdx
+; CHECK-NEXT: andq %rcx, %rdx
+; CHECK-NEXT: orq %rax, %rdx
+; CHECK-NEXT: movq %rdx, %xmm3
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; CHECK-NEXT: movaps %xmm0, %xmm3
+; CHECK-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3],xmm0[3,3]
+; CHECK-NEXT: cvttss2si %xmm3, %rax
+; CHECK-NEXT: subss %xmm2, %xmm3
+; CHECK-NEXT: cvttss2si %xmm3, %rcx
+; CHECK-NEXT: movq %rax, %rdx
+; CHECK-NEXT: sarq $63, %rdx
+; CHECK-NEXT: andq %rcx, %rdx
+; CHECK-NEXT: orq %rax, %rdx
+; CHECK-NEXT: movq %rdx, %xmm3
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: subss %xmm2, %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %rcx
+; CHECK-NEXT: movq %rax, %rdx
+; CHECK-NEXT: sarq $63, %rdx
+; CHECK-NEXT: andq %rcx, %rdx
+; CHECK-NEXT: orq %rax, %rdx
+; CHECK-NEXT: movq %rdx, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
+; CHECK-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456]
+; CHECK-NEXT: movdqa %xmm0, %xmm4
+; CHECK-NEXT: pxor %xmm3, %xmm4
+; CHECK-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259455,9223372039002259455]
+; CHECK-NEXT: movdqa %xmm5, %xmm6
+; CHECK-NEXT: pcmpgtd %xmm4, %xmm6
+; CHECK-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm5, %xmm4
+; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; CHECK-NEXT: pand %xmm7, %xmm4
+; CHECK-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; CHECK-NEXT: por %xmm4, %xmm6
+; CHECK-NEXT: pand %xmm6, %xmm0
+; CHECK-NEXT: pandn %xmm2, %xmm6
+; CHECK-NEXT: por %xmm0, %xmm6
+; CHECK-NEXT: pxor %xmm1, %xmm3
+; CHECK-NEXT: movdqa %xmm5, %xmm0
+; CHECK-NEXT: pcmpgtd %xmm3, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm5, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; CHECK-NEXT: pand %xmm4, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; CHECK-NEXT: por %xmm3, %xmm0
+; CHECK-NEXT: pand %xmm0, %xmm1
+; CHECK-NEXT: pandn %xmm2, %xmm0
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm6[0,2]
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui <4 x float> %x to <4 x i64>
+ %0 = icmp ult <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %conv6 = trunc <4 x i64> %spec.store.select to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @ustest_f32i32(<4 x float> %x) {
+; CHECK-LABEL: ustest_f32i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movaps %xmm0, %xmm1
+; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3]
+; CHECK-NEXT: cvttss2si %xmm1, %rax
+; CHECK-NEXT: movq %rax, %xmm1
+; CHECK-NEXT: movaps %xmm0, %xmm2
+; CHECK-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
+; CHECK-NEXT: cvttss2si %xmm2, %rax
+; CHECK-NEXT: movq %rax, %xmm2
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm4
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm0[0]
+; CHECK-NEXT: movdqa {{.*#+}} xmm8 = [4294967295,4294967295]
+; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648]
+; CHECK-NEXT: movdqa %xmm4, %xmm1
+; CHECK-NEXT: pxor %xmm0, %xmm1
+; CHECK-NEXT: movdqa {{.*#+}} xmm5 = [2147483647,2147483647]
+; CHECK-NEXT: movdqa %xmm5, %xmm6
+; CHECK-NEXT: pcmpgtd %xmm1, %xmm6
+; CHECK-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm5, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; CHECK-NEXT: pand %xmm7, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm6[1,1,3,3]
+; CHECK-NEXT: por %xmm3, %xmm1
+; CHECK-NEXT: pand %xmm1, %xmm4
+; CHECK-NEXT: pandn %xmm8, %xmm1
+; CHECK-NEXT: por %xmm4, %xmm1
+; CHECK-NEXT: movdqa %xmm2, %xmm3
+; CHECK-NEXT: pxor %xmm0, %xmm3
+; CHECK-NEXT: movdqa %xmm5, %xmm4
+; CHECK-NEXT: pcmpgtd %xmm3, %xmm4
+; CHECK-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm5, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; CHECK-NEXT: pand %xmm6, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; CHECK-NEXT: por %xmm3, %xmm4
+; CHECK-NEXT: pand %xmm4, %xmm2
+; CHECK-NEXT: pandn %xmm8, %xmm4
+; CHECK-NEXT: por %xmm2, %xmm4
+; CHECK-NEXT: movdqa %xmm4, %xmm2
+; CHECK-NEXT: pxor %xmm0, %xmm2
+; CHECK-NEXT: movdqa %xmm2, %xmm3
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm3
+; CHECK-NEXT: pcmpeqd %xmm0, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-NEXT: pand %xmm3, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; CHECK-NEXT: por %xmm2, %xmm3
+; CHECK-NEXT: pand %xmm4, %xmm3
+; CHECK-NEXT: movdqa %xmm1, %xmm2
+; CHECK-NEXT: pxor %xmm0, %xmm2
+; CHECK-NEXT: movdqa %xmm2, %xmm4
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm4
+; CHECK-NEXT: pcmpeqd %xmm0, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-NEXT: pand %xmm4, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
+; CHECK-NEXT: por %xmm2, %xmm0
+; CHECK-NEXT: pand %xmm1, %xmm0
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i64>
+ %0 = icmp slt <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %1 = icmp sgt <4 x i64> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <4 x i1> %1, <4 x i64> %spec.store.select, <4 x i64> zeroinitializer
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @stest_f16i32(<4 x half> %x) {
+; CHECK-LABEL: stest_f16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: subq $32, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset %rbx, -32
+; CHECK-NEXT: .cfi_offset %r14, -24
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movl %edx, %ebp
+; CHECK-NEXT: movl %esi, %ebx
+; CHECK-NEXT: movl %edi, %r14d
+; CHECK-NEXT: movzwl %cx, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movzwl %bp, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: punpcklqdq (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movzwl %bx, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %r14w, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm3
+; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm3 = xmm3[0],mem[0]
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [2147483647,2147483647]
+; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648]
+; CHECK-NEXT: movdqa %xmm3, %xmm1
+; CHECK-NEXT: pxor %xmm0, %xmm1
+; CHECK-NEXT: movdqa {{.*#+}} xmm4 = [4294967295,4294967295]
+; CHECK-NEXT: movdqa %xmm4, %xmm5
+; CHECK-NEXT: pcmpgtd %xmm1, %xmm5
+; CHECK-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm4, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3]
+; CHECK-NEXT: pand %xmm6, %xmm7
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
+; CHECK-NEXT: por %xmm7, %xmm1
+; CHECK-NEXT: pand %xmm1, %xmm3
+; CHECK-NEXT: pandn %xmm2, %xmm1
+; CHECK-NEXT: por %xmm3, %xmm1
+; CHECK-NEXT: movdqa (%rsp), %xmm7 # 16-byte Reload
+; CHECK-NEXT: movdqa %xmm7, %xmm3
+; CHECK-NEXT: pxor %xmm0, %xmm3
+; CHECK-NEXT: movdqa %xmm4, %xmm5
+; CHECK-NEXT: pcmpgtd %xmm3, %xmm5
+; CHECK-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm4, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; CHECK-NEXT: pand %xmm6, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; CHECK-NEXT: por %xmm3, %xmm4
+; CHECK-NEXT: movdqa %xmm7, %xmm3
+; CHECK-NEXT: pand %xmm4, %xmm3
+; CHECK-NEXT: pandn %xmm2, %xmm4
+; CHECK-NEXT: por %xmm3, %xmm4
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [18446744071562067968,18446744071562067968]
+; CHECK-NEXT: movdqa %xmm4, %xmm3
+; CHECK-NEXT: pxor %xmm0, %xmm3
+; CHECK-NEXT: movdqa {{.*#+}} xmm5 = [18446744069414584320,18446744069414584320]
+; CHECK-NEXT: movdqa %xmm3, %xmm6
+; CHECK-NEXT: pcmpgtd %xmm5, %xmm6
+; CHECK-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm5, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; CHECK-NEXT: pand %xmm7, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; CHECK-NEXT: por %xmm3, %xmm6
+; CHECK-NEXT: pand %xmm6, %xmm4
+; CHECK-NEXT: pandn %xmm2, %xmm6
+; CHECK-NEXT: por %xmm4, %xmm6
+; CHECK-NEXT: pxor %xmm1, %xmm0
+; CHECK-NEXT: movdqa %xmm0, %xmm3
+; CHECK-NEXT: pcmpgtd %xmm5, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm5, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
+; CHECK-NEXT: pand %xmm4, %xmm5
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
+; CHECK-NEXT: por %xmm5, %xmm0
+; CHECK-NEXT: pand %xmm0, %xmm1
+; CHECK-NEXT: pandn %xmm2, %xmm0
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm6[0,2]
+; CHECK-NEXT: addq $32, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i64>
+ %0 = icmp slt <4 x i64> %conv, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
+ %1 = icmp sgt <4 x i64> %spec.store.select, <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
+ %spec.store.select7 = select <4 x i1> %1, <4 x i64> %spec.store.select, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @utesth_f16i32(<4 x half> %x) {
+; CHECK-LABEL: utesth_f16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: subq $32, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset %rbx, -32
+; CHECK-NEXT: .cfi_offset %r14, -24
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movl %ecx, %ebp
+; CHECK-NEXT: movl %edx, %r14d
+; CHECK-NEXT: movl %edi, %ebx
+; CHECK-NEXT: movzwl %si, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %rcx
+; CHECK-NEXT: movq %rax, %rdx
+; CHECK-NEXT: sarq $63, %rdx
+; CHECK-NEXT: andq %rcx, %rdx
+; CHECK-NEXT: orq %rax, %rdx
+; CHECK-NEXT: movq %rdx, %xmm0
+; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movzwl %bx, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %rcx
+; CHECK-NEXT: movq %rax, %rdx
+; CHECK-NEXT: sarq $63, %rdx
+; CHECK-NEXT: andq %rcx, %rdx
+; CHECK-NEXT: orq %rax, %rdx
+; CHECK-NEXT: movq %rdx, %xmm0
+; CHECK-NEXT: punpcklqdq (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movzwl %bp, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %rcx
+; CHECK-NEXT: movq %rax, %rdx
+; CHECK-NEXT: sarq $63, %rdx
+; CHECK-NEXT: andq %rcx, %rdx
+; CHECK-NEXT: orq %rax, %rdx
+; CHECK-NEXT: movq %rdx, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %r14w, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %rcx
+; CHECK-NEXT: movq %rax, %rdx
+; CHECK-NEXT: sarq $63, %rdx
+; CHECK-NEXT: andq %rcx, %rdx
+; CHECK-NEXT: orq %rax, %rdx
+; CHECK-NEXT: movq %rdx, %xmm0
+; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [4294967295,4294967295]
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
+; CHECK-NEXT: movdqa %xmm0, %xmm3
+; CHECK-NEXT: pxor %xmm2, %xmm3
+; CHECK-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259455,9223372039002259455]
+; CHECK-NEXT: movdqa %xmm4, %xmm5
+; CHECK-NEXT: pcmpgtd %xmm3, %xmm5
+; CHECK-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm4, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; CHECK-NEXT: pand %xmm6, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; CHECK-NEXT: por %xmm3, %xmm5
+; CHECK-NEXT: pand %xmm5, %xmm0
+; CHECK-NEXT: pandn %xmm1, %xmm5
+; CHECK-NEXT: por %xmm0, %xmm5
+; CHECK-NEXT: movdqa (%rsp), %xmm6 # 16-byte Reload
+; CHECK-NEXT: pxor %xmm6, %xmm2
+; CHECK-NEXT: movdqa %xmm4, %xmm0
+; CHECK-NEXT: pcmpgtd %xmm2, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm4, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-NEXT: pand %xmm3, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; CHECK-NEXT: por %xmm2, %xmm0
+; CHECK-NEXT: pand %xmm0, %xmm6
+; CHECK-NEXT: pandn %xmm1, %xmm0
+; CHECK-NEXT: por %xmm6, %xmm0
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm5[0,2]
+; CHECK-NEXT: addq $32, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui <4 x half> %x to <4 x i64>
+ %0 = icmp ult <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %conv6 = trunc <4 x i64> %spec.store.select to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @ustest_f16i32(<4 x half> %x) {
+; CHECK-LABEL: ustest_f16i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: subq $32, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset %rbx, -32
+; CHECK-NEXT: .cfi_offset %r14, -24
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movl %edx, %ebp
+; CHECK-NEXT: movl %esi, %ebx
+; CHECK-NEXT: movl %edi, %r14d
+; CHECK-NEXT: movzwl %cx, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movzwl %bp, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: punpcklqdq (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movzwl %bx, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %r14w, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm3
+; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm3 = xmm3[0],mem[0]
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
+; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648]
+; CHECK-NEXT: movdqa %xmm3, %xmm1
+; CHECK-NEXT: pxor %xmm0, %xmm1
+; CHECK-NEXT: movdqa {{.*#+}} xmm4 = [2147483647,2147483647]
+; CHECK-NEXT: movdqa %xmm4, %xmm5
+; CHECK-NEXT: pcmpgtd %xmm1, %xmm5
+; CHECK-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm4, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3]
+; CHECK-NEXT: pand %xmm6, %xmm7
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
+; CHECK-NEXT: por %xmm7, %xmm1
+; CHECK-NEXT: pand %xmm1, %xmm3
+; CHECK-NEXT: pandn %xmm2, %xmm1
+; CHECK-NEXT: por %xmm3, %xmm1
+; CHECK-NEXT: movdqa (%rsp), %xmm7 # 16-byte Reload
+; CHECK-NEXT: movdqa %xmm7, %xmm3
+; CHECK-NEXT: pxor %xmm0, %xmm3
+; CHECK-NEXT: movdqa %xmm4, %xmm5
+; CHECK-NEXT: pcmpgtd %xmm3, %xmm5
+; CHECK-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm4, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; CHECK-NEXT: pand %xmm6, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; CHECK-NEXT: por %xmm3, %xmm4
+; CHECK-NEXT: movdqa %xmm7, %xmm3
+; CHECK-NEXT: pand %xmm4, %xmm3
+; CHECK-NEXT: pandn %xmm2, %xmm4
+; CHECK-NEXT: por %xmm3, %xmm4
+; CHECK-NEXT: movdqa %xmm4, %xmm2
+; CHECK-NEXT: pxor %xmm0, %xmm2
+; CHECK-NEXT: movdqa %xmm2, %xmm3
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm3
+; CHECK-NEXT: pcmpeqd %xmm0, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-NEXT: pand %xmm3, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; CHECK-NEXT: por %xmm2, %xmm3
+; CHECK-NEXT: pand %xmm4, %xmm3
+; CHECK-NEXT: movdqa %xmm1, %xmm2
+; CHECK-NEXT: pxor %xmm0, %xmm2
+; CHECK-NEXT: movdqa %xmm2, %xmm4
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm4
+; CHECK-NEXT: pcmpeqd %xmm0, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-NEXT: pand %xmm4, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
+; CHECK-NEXT: por %xmm2, %xmm0
+; CHECK-NEXT: pand %xmm1, %xmm0
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
+; CHECK-NEXT: addq $32, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i64>
+ %0 = icmp slt <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %spec.store.select = select <4 x i1> %0, <4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %1 = icmp sgt <4 x i64> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <4 x i1> %1, <4 x i64> %spec.store.select, <4 x i64> zeroinitializer
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+; i16 saturate
+
+define <2 x i16> @stest_f64i16(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttpd2dq %xmm0, %xmm0
+; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <32767,32767,u,u>
+; CHECK-NEXT: movdqa %xmm1, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm2
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: pandn %xmm1, %xmm2
+; CHECK-NEXT: por %xmm0, %xmm2
+; CHECK-NEXT: movdqa {{.*#+}} xmm0 = <4294934528,4294934528,u,u>
+; CHECK-NEXT: movdqa %xmm2, %xmm1
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm1
+; CHECK-NEXT: pand %xmm1, %xmm2
+; CHECK-NEXT: pandn %xmm0, %xmm1
+; CHECK-NEXT: por %xmm2, %xmm1
+; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i32>
+ %0 = icmp slt <2 x i32> %conv, <i32 32767, i32 32767>
+ %spec.store.select = select <2 x i1> %0, <2 x i32> %conv, <2 x i32> <i32 32767, i32 32767>
+ %1 = icmp sgt <2 x i32> %spec.store.select, <i32 -32768, i32 -32768>
+ %spec.store.select7 = select <2 x i1> %1, <2 x i32> %spec.store.select, <2 x i32> <i32 -32768, i32 -32768>
+ %conv6 = trunc <2 x i32> %spec.store.select7 to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <2 x i16> @utest_f64i16(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttpd2dq %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm2
+; CHECK-NEXT: psrad $31, %xmm2
+; CHECK-NEXT: addpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttpd2dq %xmm0, %xmm0
+; CHECK-NEXT: andpd %xmm2, %xmm0
+; CHECK-NEXT: orpd %xmm1, %xmm0
+; CHECK-NEXT: movapd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
+; CHECK-NEXT: xorpd %xmm0, %xmm1
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = <2147549183,2147549183,u,u>
+; CHECK-NEXT: pcmpgtd %xmm1, %xmm2
+; CHECK-NEXT: andpd %xmm2, %xmm0
+; CHECK-NEXT: andnpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-NEXT: orpd %xmm0, %xmm2
+; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,2,2,3,4,5,6,7]
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i32>
+ %0 = icmp ult <2 x i32> %conv, <i32 65535, i32 65535>
+ %spec.store.select = select <2 x i1> %0, <2 x i32> %conv, <2 x i32> <i32 65535, i32 65535>
+ %conv6 = trunc <2 x i32> %spec.store.select to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <2 x i16> @ustest_f64i16(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttpd2dq %xmm0, %xmm0
+; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <65535,65535,u,u>
+; CHECK-NEXT: movdqa %xmm1, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm2
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: pandn %xmm1, %xmm2
+; CHECK-NEXT: por %xmm0, %xmm2
+; CHECK-NEXT: pxor %xmm0, %xmm0
+; CHECK-NEXT: movdqa %xmm2, %xmm1
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm1
+; CHECK-NEXT: pand %xmm2, %xmm1
+; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i32>
+ %0 = icmp slt <2 x i32> %conv, <i32 65535, i32 65535>
+ %spec.store.select = select <2 x i1> %0, <2 x i32> %conv, <2 x i32> <i32 65535, i32 65535>
+ %1 = icmp sgt <2 x i32> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <2 x i1> %1, <2 x i32> %spec.store.select, <2 x i32> zeroinitializer
+ %conv6 = trunc <2 x i32> %spec.store.select7 to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <4 x i16> @stest_f32i16(<4 x float> %x) {
+; CHECK-LABEL: stest_f32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttps2dq %xmm0, %xmm0
+; CHECK-NEXT: packssdw %xmm0, %xmm0
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i32>
+ %0 = icmp slt <4 x i32> %conv, <i32 32767, i32 32767, i32 32767, i32 32767>
+ %spec.store.select = select <4 x i1> %0, <4 x i32> %conv, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>
+ %1 = icmp sgt <4 x i32> %spec.store.select, <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %spec.store.select7 = select <4 x i1> %1, <4 x i32> %spec.store.select, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %conv6 = trunc <4 x i32> %spec.store.select7 to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <4 x i16> @utest_f32i16(<4 x float> %x) {
+; CHECK-LABEL: utest_f32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttps2dq %xmm0, %xmm1
+; CHECK-NEXT: movdqa %xmm1, %xmm2
+; CHECK-NEXT: psrad $31, %xmm2
+; CHECK-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttps2dq %xmm0, %xmm0
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
+; CHECK-NEXT: pxor %xmm0, %xmm1
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [2147549183,2147549183,2147549183,2147549183]
+; CHECK-NEXT: pcmpgtd %xmm1, %xmm2
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-NEXT: por %xmm0, %xmm2
+; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,2,2,3,4,5,6,7]
+; CHECK-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui <4 x float> %x to <4 x i32>
+ %0 = icmp ult <4 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535>
+ %spec.store.select = select <4 x i1> %0, <4 x i32> %conv, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
+ %conv6 = trunc <4 x i32> %spec.store.select to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <4 x i16> @ustest_f32i16(<4 x float> %x) {
+; CHECK-LABEL: ustest_f32i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttps2dq %xmm0, %xmm0
+; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
+; CHECK-NEXT: movdqa %xmm1, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm2
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: pandn %xmm1, %xmm2
+; CHECK-NEXT: por %xmm0, %xmm2
+; CHECK-NEXT: pxor %xmm0, %xmm0
+; CHECK-NEXT: movdqa %xmm2, %xmm1
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm1
+; CHECK-NEXT: pand %xmm2, %xmm1
+; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; CHECK-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i32>
+ %0 = icmp slt <4 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535>
+ %spec.store.select = select <4 x i1> %0, <4 x i32> %conv, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
+ %1 = icmp sgt <4 x i32> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <4 x i1> %1, <4 x i32> %spec.store.select, <4 x i32> zeroinitializer
+ %conv6 = trunc <4 x i32> %spec.store.select7 to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <8 x i16> @stest_f16i16(<8 x half> %x) {
+; CHECK-LABEL: stest_f16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: pushq %r13
+; CHECK-NEXT: .cfi_def_cfa_offset 40
+; CHECK-NEXT: pushq %r12
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 56
+; CHECK-NEXT: subq $72, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 128
+; CHECK-NEXT: .cfi_offset %rbx, -56
+; CHECK-NEXT: .cfi_offset %r12, -48
+; CHECK-NEXT: .cfi_offset %r13, -40
+; CHECK-NEXT: .cfi_offset %r14, -32
+; CHECK-NEXT: .cfi_offset %r15, -24
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movl %r9d, %ebp
+; CHECK-NEXT: movl %r8d, %ebx
+; CHECK-NEXT: movl %ecx, %r13d
+; CHECK-NEXT: movl %edx, %r12d
+; CHECK-NEXT: movl %esi, %r15d
+; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %r14d
+; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movl %r14d, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %bp, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %bx, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %r13w, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %r12w, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %r15w, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 2-byte Folded Reload
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: packssdw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $72, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 56
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: popq %r12
+; CHECK-NEXT: .cfi_def_cfa_offset 40
+; CHECK-NEXT: popq %r13
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <8 x half> %x to <8 x i32>
+ %0 = icmp slt <8 x i32> %conv, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
+ %spec.store.select = select <8 x i1> %0, <8 x i32> %conv, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
+ %1 = icmp sgt <8 x i32> %spec.store.select, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %spec.store.select7 = select <8 x i1> %1, <8 x i32> %spec.store.select, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+ %conv6 = trunc <8 x i32> %spec.store.select7 to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+define <8 x i16> @utesth_f16i16(<8 x half> %x) {
+; CHECK-LABEL: utesth_f16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: pushq %r13
+; CHECK-NEXT: .cfi_def_cfa_offset 40
+; CHECK-NEXT: pushq %r12
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 56
+; CHECK-NEXT: subq $72, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 128
+; CHECK-NEXT: .cfi_offset %rbx, -56
+; CHECK-NEXT: .cfi_offset %r12, -48
+; CHECK-NEXT: .cfi_offset %r13, -40
+; CHECK-NEXT: .cfi_offset %r14, -32
+; CHECK-NEXT: .cfi_offset %r15, -24
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movl %r9d, %ebp
+; CHECK-NEXT: movl %r8d, %ebx
+; CHECK-NEXT: movl %ecx, %r13d
+; CHECK-NEXT: movl %edx, %r12d
+; CHECK-NEXT: movl %esi, %r15d
+; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %r14d
+; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movl %r14d, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %bp, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %bx, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %r13w, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %r12w, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %r15w, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 2-byte Folded Reload
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movd %eax, %xmm1
+; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
+; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; CHECK-NEXT: movdqa %xmm1, %xmm3
+; CHECK-NEXT: pxor %xmm2, %xmm3
+; CHECK-NEXT: movdqa {{.*#+}} xmm4 = [2147549183,2147549183,2147549183,2147549183]
+; CHECK-NEXT: movdqa %xmm4, %xmm0
+; CHECK-NEXT: pcmpgtd %xmm3, %xmm0
+; CHECK-NEXT: pand %xmm0, %xmm1
+; CHECK-NEXT: pcmpeqd %xmm3, %xmm3
+; CHECK-NEXT: pxor %xmm3, %xmm0
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: pxor %xmm1, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm2, %xmm4
+; CHECK-NEXT: pand %xmm4, %xmm1
+; CHECK-NEXT: pxor %xmm3, %xmm4
+; CHECK-NEXT: por %xmm1, %xmm4
+; CHECK-NEXT: pslld $16, %xmm4
+; CHECK-NEXT: psrad $16, %xmm4
+; CHECK-NEXT: pslld $16, %xmm0
+; CHECK-NEXT: psrad $16, %xmm0
+; CHECK-NEXT: packssdw %xmm4, %xmm0
+; CHECK-NEXT: addq $72, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 56
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: popq %r12
+; CHECK-NEXT: .cfi_def_cfa_offset 40
+; CHECK-NEXT: popq %r13
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui <8 x half> %x to <8 x i32>
+ %0 = icmp ult <8 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %spec.store.select = select <8 x i1> %0, <8 x i32> %conv, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %conv6 = trunc <8 x i32> %spec.store.select to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+define <8 x i16> @ustest_f16i16(<8 x half> %x) {
+; CHECK-LABEL: ustest_f16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: pushq %r13
+; CHECK-NEXT: .cfi_def_cfa_offset 40
+; CHECK-NEXT: pushq %r12
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 56
+; CHECK-NEXT: subq $72, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 128
+; CHECK-NEXT: .cfi_offset %rbx, -56
+; CHECK-NEXT: .cfi_offset %r12, -48
+; CHECK-NEXT: .cfi_offset %r13, -40
+; CHECK-NEXT: .cfi_offset %r14, -32
+; CHECK-NEXT: .cfi_offset %r15, -24
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movl %r9d, %r15d
+; CHECK-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: movl %edx, %ebp
+; CHECK-NEXT: movl %esi, %r14d
+; CHECK-NEXT: movl %edi, %ebx
+; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %r12d
+; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %r13d
+; CHECK-NEXT: movzwl %cx, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %bp, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %r14w, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %bx, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movl %r13d, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movl %r12d, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %r15w, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 2-byte Folded Reload
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
+; CHECK-NEXT: movdqa %xmm1, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm2
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: pandn %xmm1, %xmm2
+; CHECK-NEXT: por %xmm0, %xmm2
+; CHECK-NEXT: movdqa %xmm1, %xmm3
+; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm3
+; CHECK-NEXT: pand %xmm3, %xmm0
+; CHECK-NEXT: pandn %xmm1, %xmm3
+; CHECK-NEXT: por %xmm0, %xmm3
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: movdqa %xmm3, %xmm0
+; CHECK-NEXT: pcmpgtd %xmm1, %xmm0
+; CHECK-NEXT: pand %xmm3, %xmm0
+; CHECK-NEXT: movdqa %xmm2, %xmm3
+; CHECK-NEXT: pcmpgtd %xmm1, %xmm3
+; CHECK-NEXT: pand %xmm2, %xmm3
+; CHECK-NEXT: pslld $16, %xmm3
+; CHECK-NEXT: psrad $16, %xmm3
+; CHECK-NEXT: pslld $16, %xmm0
+; CHECK-NEXT: psrad $16, %xmm0
+; CHECK-NEXT: packssdw %xmm3, %xmm0
+; CHECK-NEXT: addq $72, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 56
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: popq %r12
+; CHECK-NEXT: .cfi_def_cfa_offset 40
+; CHECK-NEXT: popq %r13
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <8 x half> %x to <8 x i32>
+ %0 = icmp slt <8 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %spec.store.select = select <8 x i1> %0, <8 x i32> %conv, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %1 = icmp sgt <8 x i32> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <8 x i1> %1, <8 x i32> %spec.store.select, <8 x i32> zeroinitializer
+ %conv6 = trunc <8 x i32> %spec.store.select7 to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+; i64 saturate
+
+define <2 x i64> @stest_f64i64(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset %rbx, -24
+; CHECK-NEXT: .cfi_offset %r14, -16
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: callq __fixdfti at PLT
+; CHECK-NEXT: movq %rax, %r14
+; CHECK-NEXT: movq %rdx, %rbx
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq __fixdfti at PLT
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: movabsq $9223372036854775807, %rsi # imm = 0x7FFFFFFFFFFFFFFF
+; CHECK-NEXT: cmpq %rsi, %rax
+; CHECK-NEXT: movq %rdx, %rdi
+; CHECK-NEXT: sbbq $0, %rdi
+; CHECK-NEXT: cmovgeq %rcx, %rdx
+; CHECK-NEXT: cmovgeq %rsi, %rax
+; CHECK-NEXT: cmpq %rsi, %r14
+; CHECK-NEXT: movq %rbx, %rdi
+; CHECK-NEXT: sbbq $0, %rdi
+; CHECK-NEXT: cmovlq %rbx, %rcx
+; CHECK-NEXT: cmovlq %r14, %rsi
+; CHECK-NEXT: movabsq $-9223372036854775808, %r8 # imm = 0x8000000000000000
+; CHECK-NEXT: cmpq %rsi, %r8
+; CHECK-NEXT: movq $-1, %rbx
+; CHECK-NEXT: movq $-1, %rdi
+; CHECK-NEXT: sbbq %rcx, %rdi
+; CHECK-NEXT: cmovgeq %r8, %rsi
+; CHECK-NEXT: cmpq %rax, %r8
+; CHECK-NEXT: sbbq %rdx, %rbx
+; CHECK-NEXT: cmovgeq %r8, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: movq %rsi, %xmm1
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 9223372036854775807, i128 9223372036854775807>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>
+ %1 = icmp sgt <2 x i128> %spec.store.select, <i128 -9223372036854775808, i128 -9223372036854775808>
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utest_f64i64(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset %rbx, -24
+; CHECK-NEXT: .cfi_offset %r14, -16
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: callq __fixunsdfti at PLT
+; CHECK-NEXT: movq %rax, %rbx
+; CHECK-NEXT: movq %rdx, %r14
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: callq __fixunsdfti at PLT
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: cmovneq %rcx, %rax
+; CHECK-NEXT: testq %r14, %r14
+; CHECK-NEXT: cmovneq %rcx, %rbx
+; CHECK-NEXT: movq %rbx, %xmm0
+; CHECK-NEXT: movq %rax, %xmm1
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i128>
+ %0 = icmp ult <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f64i64(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset %rbx, -24
+; CHECK-NEXT: .cfi_offset %r14, -16
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: callq __fixdfti at PLT
+; CHECK-NEXT: movq %rax, %rbx
+; CHECK-NEXT: movq %rdx, %r14
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq __fixdfti at PLT
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: movl $1, %esi
+; CHECK-NEXT: cmovgq %rsi, %rdx
+; CHECK-NEXT: cmovgq %rcx, %rax
+; CHECK-NEXT: testq %r14, %r14
+; CHECK-NEXT: cmovleq %r14, %rsi
+; CHECK-NEXT: cmovgq %rcx, %rbx
+; CHECK-NEXT: movq %rbx, %rdi
+; CHECK-NEXT: negq %rdi
+; CHECK-NEXT: movl $0, %edi
+; CHECK-NEXT: sbbq %rsi, %rdi
+; CHECK-NEXT: cmovgeq %rcx, %rbx
+; CHECK-NEXT: movq %rax, %rsi
+; CHECK-NEXT: negq %rsi
+; CHECK-NEXT: movl $0, %esi
+; CHECK-NEXT: sbbq %rdx, %rsi
+; CHECK-NEXT: cmovgeq %rcx, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: movq %rbx, %xmm1
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %1 = icmp sgt <2 x i128> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> zeroinitializer
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @stest_f32i64(<2 x float> %x) {
+; CHECK-LABEL: stest_f32i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset %rbx, -24
+; CHECK-NEXT: .cfi_offset %r14, -16
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; CHECK-NEXT: callq __fixsfti at PLT
+; CHECK-NEXT: movq %rax, %r14
+; CHECK-NEXT: movq %rdx, %rbx
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq __fixsfti at PLT
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: movabsq $9223372036854775807, %rsi # imm = 0x7FFFFFFFFFFFFFFF
+; CHECK-NEXT: cmpq %rsi, %rax
+; CHECK-NEXT: movq %rdx, %rdi
+; CHECK-NEXT: sbbq $0, %rdi
+; CHECK-NEXT: cmovgeq %rcx, %rdx
+; CHECK-NEXT: cmovgeq %rsi, %rax
+; CHECK-NEXT: cmpq %rsi, %r14
+; CHECK-NEXT: movq %rbx, %rdi
+; CHECK-NEXT: sbbq $0, %rdi
+; CHECK-NEXT: cmovlq %rbx, %rcx
+; CHECK-NEXT: cmovlq %r14, %rsi
+; CHECK-NEXT: movabsq $-9223372036854775808, %r8 # imm = 0x8000000000000000
+; CHECK-NEXT: cmpq %rsi, %r8
+; CHECK-NEXT: movq $-1, %rbx
+; CHECK-NEXT: movq $-1, %rdi
+; CHECK-NEXT: sbbq %rcx, %rdi
+; CHECK-NEXT: cmovgeq %r8, %rsi
+; CHECK-NEXT: cmpq %rax, %r8
+; CHECK-NEXT: sbbq %rdx, %rbx
+; CHECK-NEXT: cmovgeq %r8, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: movq %rsi, %xmm1
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <2 x float> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 9223372036854775807, i128 9223372036854775807>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>
+ %1 = icmp sgt <2 x i128> %spec.store.select, <i128 -9223372036854775808, i128 -9223372036854775808>
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utest_f32i64(<2 x float> %x) {
+; CHECK-LABEL: utest_f32i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset %rbx, -24
+; CHECK-NEXT: .cfi_offset %r14, -16
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: callq __fixunssfti at PLT
+; CHECK-NEXT: movq %rax, %rbx
+; CHECK-NEXT: movq %rdx, %r14
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; CHECK-NEXT: callq __fixunssfti at PLT
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: cmovneq %rcx, %rax
+; CHECK-NEXT: testq %r14, %r14
+; CHECK-NEXT: cmovneq %rcx, %rbx
+; CHECK-NEXT: movq %rbx, %xmm0
+; CHECK-NEXT: movq %rax, %xmm1
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui <2 x float> %x to <2 x i128>
+ %0 = icmp ult <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f32i64(<2 x float> %x) {
+; CHECK-LABEL: ustest_f32i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset %rbx, -24
+; CHECK-NEXT: .cfi_offset %r14, -16
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; CHECK-NEXT: callq __fixsfti at PLT
+; CHECK-NEXT: movq %rax, %rbx
+; CHECK-NEXT: movq %rdx, %r14
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq __fixsfti at PLT
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: movl $1, %esi
+; CHECK-NEXT: cmovgq %rsi, %rdx
+; CHECK-NEXT: cmovgq %rcx, %rax
+; CHECK-NEXT: testq %r14, %r14
+; CHECK-NEXT: cmovleq %r14, %rsi
+; CHECK-NEXT: cmovgq %rcx, %rbx
+; CHECK-NEXT: movq %rbx, %rdi
+; CHECK-NEXT: negq %rdi
+; CHECK-NEXT: movl $0, %edi
+; CHECK-NEXT: sbbq %rsi, %rdi
+; CHECK-NEXT: cmovgeq %rcx, %rbx
+; CHECK-NEXT: movq %rax, %rsi
+; CHECK-NEXT: negq %rsi
+; CHECK-NEXT: movl $0, %esi
+; CHECK-NEXT: sbbq %rdx, %rsi
+; CHECK-NEXT: cmovgeq %rcx, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: movq %rbx, %xmm1
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <2 x float> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %1 = icmp sgt <2 x i128> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> zeroinitializer
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @stest_f16i64(<2 x half> %x) {
+; CHECK-LABEL: stest_f16i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset %rbx, -32
+; CHECK-NEXT: .cfi_offset %r14, -24
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movl %esi, %r14d
+; CHECK-NEXT: movzwl %di, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: callq __fixsfti at PLT
+; CHECK-NEXT: movq %rax, %rbx
+; CHECK-NEXT: movq %rdx, %rbp
+; CHECK-NEXT: movzwl %r14w, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: callq __fixsfti at PLT
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: movabsq $9223372036854775807, %rsi # imm = 0x7FFFFFFFFFFFFFFF
+; CHECK-NEXT: cmpq %rsi, %rax
+; CHECK-NEXT: movq %rdx, %rdi
+; CHECK-NEXT: sbbq $0, %rdi
+; CHECK-NEXT: cmovgeq %rcx, %rdx
+; CHECK-NEXT: cmovgeq %rsi, %rax
+; CHECK-NEXT: cmpq %rsi, %rbx
+; CHECK-NEXT: movq %rbp, %rdi
+; CHECK-NEXT: sbbq $0, %rdi
+; CHECK-NEXT: cmovlq %rbp, %rcx
+; CHECK-NEXT: cmovlq %rbx, %rsi
+; CHECK-NEXT: movabsq $-9223372036854775808, %rdi # imm = 0x8000000000000000
+; CHECK-NEXT: cmpq %rsi, %rdi
+; CHECK-NEXT: movq $-1, %rbp
+; CHECK-NEXT: movq $-1, %rbx
+; CHECK-NEXT: sbbq %rcx, %rbx
+; CHECK-NEXT: cmovgeq %rdi, %rsi
+; CHECK-NEXT: cmpq %rax, %rdi
+; CHECK-NEXT: sbbq %rdx, %rbp
+; CHECK-NEXT: cmovgeq %rdi, %rax
+; CHECK-NEXT: movq %rax, %xmm1
+; CHECK-NEXT: movq %rsi, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <2 x half> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 9223372036854775807, i128 9223372036854775807>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>
+ %1 = icmp sgt <2 x i128> %spec.store.select, <i128 -9223372036854775808, i128 -9223372036854775808>
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utesth_f16i64(<2 x half> %x) {
+; CHECK-LABEL: utesth_f16i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset %rbx, -32
+; CHECK-NEXT: .cfi_offset %r14, -24
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movl %edi, %ebp
+; CHECK-NEXT: movzwl %si, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: callq __fixunssfti at PLT
+; CHECK-NEXT: movq %rax, %rbx
+; CHECK-NEXT: movq %rdx, %r14
+; CHECK-NEXT: movzwl %bp, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: callq __fixunssfti at PLT
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: cmovneq %rcx, %rax
+; CHECK-NEXT: testq %r14, %r14
+; CHECK-NEXT: cmovneq %rcx, %rbx
+; CHECK-NEXT: movq %rbx, %xmm1
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui <2 x half> %x to <2 x i128>
+ %0 = icmp ult <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f16i64(<2 x half> %x) {
+; CHECK-LABEL: ustest_f16i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset %rbx, -32
+; CHECK-NEXT: .cfi_offset %r14, -24
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movl %esi, %r14d
+; CHECK-NEXT: movzwl %di, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: callq __fixsfti at PLT
+; CHECK-NEXT: movq %rax, %rbx
+; CHECK-NEXT: movq %rdx, %rbp
+; CHECK-NEXT: movzwl %r14w, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: callq __fixsfti at PLT
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: movl $1, %esi
+; CHECK-NEXT: cmovgq %rsi, %rdx
+; CHECK-NEXT: cmovgq %rcx, %rax
+; CHECK-NEXT: testq %rbp, %rbp
+; CHECK-NEXT: cmovleq %rbp, %rsi
+; CHECK-NEXT: cmovgq %rcx, %rbx
+; CHECK-NEXT: movq %rbx, %rdi
+; CHECK-NEXT: negq %rdi
+; CHECK-NEXT: movl $0, %edi
+; CHECK-NEXT: sbbq %rsi, %rdi
+; CHECK-NEXT: cmovgeq %rcx, %rbx
+; CHECK-NEXT: movq %rax, %rsi
+; CHECK-NEXT: negq %rsi
+; CHECK-NEXT: movl $0, %esi
+; CHECK-NEXT: sbbq %rdx, %rsi
+; CHECK-NEXT: cmovgeq %rcx, %rax
+; CHECK-NEXT: movq %rax, %xmm1
+; CHECK-NEXT: movq %rbx, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <2 x half> %x to <2 x i128>
+ %0 = icmp slt <2 x i128> %conv, <i128 18446744073709551616, i128 18446744073709551616>
+ %spec.store.select = select <2 x i1> %0, <2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>
+ %1 = icmp sgt <2 x i128> %spec.store.select, zeroinitializer
+ %spec.store.select7 = select <2 x i1> %1, <2 x i128> %spec.store.select, <2 x i128> zeroinitializer
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+
+
+; i32 saturate
+
+define <2 x i32> @stest_f64i32_mm(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i32_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttsd2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm1
+; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: cvttsd2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648]
+; CHECK-NEXT: movdqa %xmm1, %xmm2
+; CHECK-NEXT: pxor %xmm0, %xmm2
+; CHECK-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
+; CHECK-NEXT: movdqa %xmm3, %xmm4
+; CHECK-NEXT: pcmpgtd %xmm2, %xmm4
+; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm3, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-NEXT: pand %xmm5, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; CHECK-NEXT: por %xmm2, %xmm3
+; CHECK-NEXT: pand %xmm3, %xmm1
+; CHECK-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
+; CHECK-NEXT: por %xmm1, %xmm3
+; CHECK-NEXT: pxor %xmm3, %xmm0
+; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [18446744069414584320,18446744069414584320]
+; CHECK-NEXT: movdqa %xmm0, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm1, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; CHECK-NEXT: pand %xmm4, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; CHECK-NEXT: por %xmm0, %xmm1
+; CHECK-NEXT: pand %xmm1, %xmm3
+; CHECK-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NEXT: por %xmm3, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i64>
+ %spec.store.select = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %conv, <2 x i64> <i64 2147483647, i64 2147483647>)
+ %spec.store.select7 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %spec.store.select, <2 x i64> <i64 -2147483648, i64 -2147483648>)
+ %conv6 = trunc <2 x i64> %spec.store.select7 to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <2 x i32> @utest_f64i32_mm(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i32_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
+; CHECK-NEXT: movapd %xmm0, %xmm1
+; CHECK-NEXT: subsd %xmm2, %xmm1
+; CHECK-NEXT: cvttsd2si %xmm1, %rax
+; CHECK-NEXT: cvttsd2si %xmm0, %rcx
+; CHECK-NEXT: movq %rcx, %rdx
+; CHECK-NEXT: sarq $63, %rdx
+; CHECK-NEXT: andq %rax, %rdx
+; CHECK-NEXT: orq %rcx, %rdx
+; CHECK-NEXT: movq %rdx, %xmm1
+; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: cvttsd2si %xmm0, %rax
+; CHECK-NEXT: subsd %xmm2, %xmm0
+; CHECK-NEXT: cvttsd2si %xmm0, %rcx
+; CHECK-NEXT: movq %rax, %rdx
+; CHECK-NEXT: sarq $63, %rdx
+; CHECK-NEXT: andq %rcx, %rdx
+; CHECK-NEXT: orq %rax, %rdx
+; CHECK-NEXT: movq %rdx, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
+; CHECK-NEXT: pxor %xmm1, %xmm0
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259455,9223372039002259455]
+; CHECK-NEXT: movdqa %xmm2, %xmm3
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm2, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; CHECK-NEXT: pand %xmm4, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
+; CHECK-NEXT: por %xmm0, %xmm2
+; CHECK-NEXT: pand %xmm2, %xmm1
+; CHECK-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-NEXT: por %xmm1, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i64>
+ %spec.store.select = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %conv, <2 x i64> <i64 4294967295, i64 4294967295>)
+ %conv6 = trunc <2 x i64> %spec.store.select to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i32_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttsd2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm1
+; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: cvttsd2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648]
+; CHECK-NEXT: movdqa %xmm1, %xmm2
+; CHECK-NEXT: pxor %xmm0, %xmm2
+; CHECK-NEXT: movdqa {{.*#+}} xmm3 = [2147483647,2147483647]
+; CHECK-NEXT: movdqa %xmm3, %xmm4
+; CHECK-NEXT: pcmpgtd %xmm2, %xmm4
+; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm3, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-NEXT: pand %xmm5, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; CHECK-NEXT: por %xmm2, %xmm3
+; CHECK-NEXT: pand %xmm3, %xmm1
+; CHECK-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
+; CHECK-NEXT: por %xmm1, %xmm3
+; CHECK-NEXT: movdqa %xmm3, %xmm1
+; CHECK-NEXT: pxor %xmm0, %xmm1
+; CHECK-NEXT: movdqa %xmm1, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm2
+; CHECK-NEXT: pcmpeqd %xmm0, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; CHECK-NEXT: por %xmm0, %xmm1
+; CHECK-NEXT: pand %xmm3, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i64>
+ %spec.store.select = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %conv, <2 x i64> <i64 4294967295, i64 4294967295>)
+ %spec.store.select7 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %spec.store.select, <2 x i64> zeroinitializer)
+ %conv6 = trunc <2 x i64> %spec.store.select7 to <2 x i32>
+ ret <2 x i32> %conv6
+}
+
+define <4 x i32> @stest_f32i32_mm(<4 x float> %x) {
+; CHECK-LABEL: stest_f32i32_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movaps %xmm0, %xmm1
+; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3]
+; CHECK-NEXT: cvttss2si %xmm1, %rax
+; CHECK-NEXT: movq %rax, %xmm1
+; CHECK-NEXT: movaps %xmm0, %xmm2
+; CHECK-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
+; CHECK-NEXT: cvttss2si %xmm2, %rax
+; CHECK-NEXT: movq %rax, %xmm2
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm3
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
+; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648]
+; CHECK-NEXT: movdqa %xmm3, %xmm1
+; CHECK-NEXT: pxor %xmm0, %xmm1
+; CHECK-NEXT: movdqa {{.*#+}} xmm4 = [4294967295,4294967295]
+; CHECK-NEXT: movdqa %xmm4, %xmm5
+; CHECK-NEXT: pcmpgtd %xmm1, %xmm5
+; CHECK-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm4, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3]
+; CHECK-NEXT: pand %xmm6, %xmm7
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
+; CHECK-NEXT: por %xmm7, %xmm1
+; CHECK-NEXT: movdqa {{.*#+}} xmm5 = [2147483647,2147483647]
+; CHECK-NEXT: pand %xmm1, %xmm3
+; CHECK-NEXT: pandn %xmm5, %xmm1
+; CHECK-NEXT: por %xmm3, %xmm1
+; CHECK-NEXT: movdqa %xmm2, %xmm3
+; CHECK-NEXT: pxor %xmm0, %xmm3
+; CHECK-NEXT: movdqa %xmm4, %xmm6
+; CHECK-NEXT: pcmpgtd %xmm3, %xmm6
+; CHECK-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm4, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; CHECK-NEXT: pand %xmm7, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm6[1,1,3,3]
+; CHECK-NEXT: por %xmm3, %xmm4
+; CHECK-NEXT: pand %xmm4, %xmm2
+; CHECK-NEXT: pandn %xmm5, %xmm4
+; CHECK-NEXT: por %xmm2, %xmm4
+; CHECK-NEXT: movdqa %xmm4, %xmm2
+; CHECK-NEXT: pxor %xmm0, %xmm2
+; CHECK-NEXT: movdqa {{.*#+}} xmm3 = [18446744069414584320,18446744069414584320]
+; CHECK-NEXT: movdqa %xmm2, %xmm5
+; CHECK-NEXT: pcmpgtd %xmm3, %xmm5
+; CHECK-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm3, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-NEXT: pand %xmm6, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; CHECK-NEXT: por %xmm2, %xmm5
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [18446744071562067968,18446744071562067968]
+; CHECK-NEXT: pand %xmm5, %xmm4
+; CHECK-NEXT: pandn %xmm2, %xmm5
+; CHECK-NEXT: por %xmm4, %xmm5
+; CHECK-NEXT: pxor %xmm1, %xmm0
+; CHECK-NEXT: movdqa %xmm0, %xmm4
+; CHECK-NEXT: pcmpgtd %xmm3, %xmm4
+; CHECK-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm3, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-NEXT: pand %xmm6, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
+; CHECK-NEXT: por %xmm3, %xmm0
+; CHECK-NEXT: pand %xmm0, %xmm1
+; CHECK-NEXT: pandn %xmm2, %xmm0
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm5[0,2]
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
+ %spec.store.select7 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %spec.store.select, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @utest_f32i32_mm(<4 x float> %x) {
+; CHECK-LABEL: utest_f32i32_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK-NEXT: movaps %xmm0, %xmm1
+; CHECK-NEXT: subss %xmm2, %xmm1
+; CHECK-NEXT: cvttss2si %xmm1, %rax
+; CHECK-NEXT: cvttss2si %xmm0, %rcx
+; CHECK-NEXT: movq %rcx, %rdx
+; CHECK-NEXT: sarq $63, %rdx
+; CHECK-NEXT: andq %rax, %rdx
+; CHECK-NEXT: orq %rcx, %rdx
+; CHECK-NEXT: movq %rdx, %xmm1
+; CHECK-NEXT: movaps %xmm0, %xmm3
+; CHECK-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[1,1]
+; CHECK-NEXT: cvttss2si %xmm3, %rax
+; CHECK-NEXT: subss %xmm2, %xmm3
+; CHECK-NEXT: cvttss2si %xmm3, %rcx
+; CHECK-NEXT: movq %rax, %rdx
+; CHECK-NEXT: sarq $63, %rdx
+; CHECK-NEXT: andq %rcx, %rdx
+; CHECK-NEXT: orq %rax, %rdx
+; CHECK-NEXT: movq %rdx, %xmm3
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; CHECK-NEXT: movaps %xmm0, %xmm3
+; CHECK-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3],xmm0[3,3]
+; CHECK-NEXT: cvttss2si %xmm3, %rax
+; CHECK-NEXT: subss %xmm2, %xmm3
+; CHECK-NEXT: cvttss2si %xmm3, %rcx
+; CHECK-NEXT: movq %rax, %rdx
+; CHECK-NEXT: sarq $63, %rdx
+; CHECK-NEXT: andq %rcx, %rdx
+; CHECK-NEXT: orq %rax, %rdx
+; CHECK-NEXT: movq %rdx, %xmm3
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: subss %xmm2, %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %rcx
+; CHECK-NEXT: movq %rax, %rdx
+; CHECK-NEXT: sarq $63, %rdx
+; CHECK-NEXT: andq %rcx, %rdx
+; CHECK-NEXT: orq %rax, %rdx
+; CHECK-NEXT: movq %rdx, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
+; CHECK-NEXT: movdqa %xmm0, %xmm3
+; CHECK-NEXT: pxor %xmm2, %xmm3
+; CHECK-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259455,9223372039002259455]
+; CHECK-NEXT: movdqa %xmm4, %xmm5
+; CHECK-NEXT: pcmpgtd %xmm3, %xmm5
+; CHECK-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm4, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; CHECK-NEXT: pand %xmm6, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; CHECK-NEXT: por %xmm3, %xmm5
+; CHECK-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
+; CHECK-NEXT: pand %xmm5, %xmm0
+; CHECK-NEXT: pandn %xmm3, %xmm5
+; CHECK-NEXT: por %xmm0, %xmm5
+; CHECK-NEXT: pxor %xmm1, %xmm2
+; CHECK-NEXT: movdqa %xmm4, %xmm0
+; CHECK-NEXT: pcmpgtd %xmm2, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm6 = xmm0[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm4, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-NEXT: pand %xmm6, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; CHECK-NEXT: por %xmm2, %xmm0
+; CHECK-NEXT: pand %xmm0, %xmm1
+; CHECK-NEXT: pandn %xmm3, %xmm0
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm5[0,2]
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui <4 x float> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
+ %conv6 = trunc <4 x i64> %spec.store.select to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
+; CHECK-LABEL: ustest_f32i32_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movaps %xmm0, %xmm1
+; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3]
+; CHECK-NEXT: cvttss2si %xmm1, %rax
+; CHECK-NEXT: movq %rax, %xmm1
+; CHECK-NEXT: movaps %xmm0, %xmm2
+; CHECK-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
+; CHECK-NEXT: cvttss2si %xmm2, %rax
+; CHECK-NEXT: movq %rax, %xmm2
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm3
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
+; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648]
+; CHECK-NEXT: movdqa %xmm3, %xmm1
+; CHECK-NEXT: pxor %xmm0, %xmm1
+; CHECK-NEXT: movdqa {{.*#+}} xmm4 = [2147483647,2147483647]
+; CHECK-NEXT: movdqa %xmm4, %xmm5
+; CHECK-NEXT: pcmpgtd %xmm1, %xmm5
+; CHECK-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm4, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm7 = xmm1[1,1,3,3]
+; CHECK-NEXT: pand %xmm6, %xmm7
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
+; CHECK-NEXT: por %xmm7, %xmm1
+; CHECK-NEXT: movdqa {{.*#+}} xmm5 = [4294967295,4294967295]
+; CHECK-NEXT: pand %xmm1, %xmm3
+; CHECK-NEXT: pandn %xmm5, %xmm1
+; CHECK-NEXT: por %xmm3, %xmm1
+; CHECK-NEXT: movdqa %xmm2, %xmm3
+; CHECK-NEXT: pxor %xmm0, %xmm3
+; CHECK-NEXT: movdqa %xmm4, %xmm6
+; CHECK-NEXT: pcmpgtd %xmm3, %xmm6
+; CHECK-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm4, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; CHECK-NEXT: pand %xmm7, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm6[1,1,3,3]
+; CHECK-NEXT: por %xmm3, %xmm4
+; CHECK-NEXT: pand %xmm4, %xmm2
+; CHECK-NEXT: pandn %xmm5, %xmm4
+; CHECK-NEXT: por %xmm2, %xmm4
+; CHECK-NEXT: movdqa %xmm4, %xmm2
+; CHECK-NEXT: pxor %xmm0, %xmm2
+; CHECK-NEXT: movdqa %xmm2, %xmm3
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm3
+; CHECK-NEXT: pcmpeqd %xmm0, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-NEXT: pand %xmm3, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; CHECK-NEXT: por %xmm2, %xmm3
+; CHECK-NEXT: pand %xmm4, %xmm3
+; CHECK-NEXT: movdqa %xmm1, %xmm2
+; CHECK-NEXT: pxor %xmm0, %xmm2
+; CHECK-NEXT: movdqa %xmm2, %xmm4
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm4
+; CHECK-NEXT: pcmpeqd %xmm0, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-NEXT: pand %xmm4, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
+; CHECK-NEXT: por %xmm2, %xmm0
+; CHECK-NEXT: pand %xmm1, %xmm0
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
+ %spec.store.select7 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %spec.store.select, <4 x i64> zeroinitializer)
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
+; CHECK-LABEL: stest_f16i32_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: subq $32, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset %rbx, -32
+; CHECK-NEXT: .cfi_offset %r14, -24
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movl %edx, %ebp
+; CHECK-NEXT: movl %esi, %ebx
+; CHECK-NEXT: movl %edi, %r14d
+; CHECK-NEXT: movzwl %cx, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movzwl %bp, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: punpcklqdq (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movzwl %bx, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %r14w, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm2
+; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm2 = xmm2[0],mem[0]
+; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648]
+; CHECK-NEXT: movdqa %xmm2, %xmm1
+; CHECK-NEXT: pxor %xmm0, %xmm1
+; CHECK-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
+; CHECK-NEXT: movdqa %xmm3, %xmm4
+; CHECK-NEXT: pcmpgtd %xmm1, %xmm4
+; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm3, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm6 = xmm1[1,1,3,3]
+; CHECK-NEXT: pand %xmm5, %xmm6
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
+; CHECK-NEXT: por %xmm6, %xmm1
+; CHECK-NEXT: movdqa {{.*#+}} xmm4 = [2147483647,2147483647]
+; CHECK-NEXT: pand %xmm1, %xmm2
+; CHECK-NEXT: pandn %xmm4, %xmm1
+; CHECK-NEXT: por %xmm2, %xmm1
+; CHECK-NEXT: movdqa (%rsp), %xmm7 # 16-byte Reload
+; CHECK-NEXT: movdqa %xmm7, %xmm2
+; CHECK-NEXT: pxor %xmm0, %xmm2
+; CHECK-NEXT: movdqa %xmm3, %xmm5
+; CHECK-NEXT: pcmpgtd %xmm2, %xmm5
+; CHECK-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm3, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-NEXT: pand %xmm6, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3]
+; CHECK-NEXT: por %xmm2, %xmm3
+; CHECK-NEXT: movdqa %xmm7, %xmm2
+; CHECK-NEXT: pand %xmm3, %xmm2
+; CHECK-NEXT: pandn %xmm4, %xmm3
+; CHECK-NEXT: por %xmm2, %xmm3
+; CHECK-NEXT: movdqa %xmm3, %xmm2
+; CHECK-NEXT: pxor %xmm0, %xmm2
+; CHECK-NEXT: movdqa {{.*#+}} xmm4 = [18446744069414584320,18446744069414584320]
+; CHECK-NEXT: movdqa %xmm2, %xmm5
+; CHECK-NEXT: pcmpgtd %xmm4, %xmm5
+; CHECK-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm4, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-NEXT: pand %xmm6, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; CHECK-NEXT: por %xmm2, %xmm5
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [18446744071562067968,18446744071562067968]
+; CHECK-NEXT: pand %xmm5, %xmm3
+; CHECK-NEXT: pandn %xmm2, %xmm5
+; CHECK-NEXT: por %xmm3, %xmm5
+; CHECK-NEXT: pxor %xmm1, %xmm0
+; CHECK-NEXT: movdqa %xmm0, %xmm3
+; CHECK-NEXT: pcmpgtd %xmm4, %xmm3
+; CHECK-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm4, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; CHECK-NEXT: pand %xmm6, %xmm4
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
+; CHECK-NEXT: por %xmm4, %xmm0
+; CHECK-NEXT: pand %xmm0, %xmm1
+; CHECK-NEXT: pandn %xmm2, %xmm0
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm5[0,2]
+; CHECK-NEXT: addq $32, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
+ %spec.store.select7 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %spec.store.select, <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
+; CHECK-LABEL: utesth_f16i32_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: subq $32, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset %rbx, -32
+; CHECK-NEXT: .cfi_offset %r14, -24
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movl %ecx, %ebp
+; CHECK-NEXT: movl %edx, %r14d
+; CHECK-NEXT: movl %edi, %ebx
+; CHECK-NEXT: movzwl %si, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %rcx
+; CHECK-NEXT: movq %rax, %rdx
+; CHECK-NEXT: sarq $63, %rdx
+; CHECK-NEXT: andq %rcx, %rdx
+; CHECK-NEXT: orq %rax, %rdx
+; CHECK-NEXT: movq %rdx, %xmm0
+; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movzwl %bx, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %rcx
+; CHECK-NEXT: movq %rax, %rdx
+; CHECK-NEXT: sarq $63, %rdx
+; CHECK-NEXT: andq %rcx, %rdx
+; CHECK-NEXT: orq %rax, %rdx
+; CHECK-NEXT: movq %rdx, %xmm0
+; CHECK-NEXT: punpcklqdq (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movzwl %bp, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %rcx
+; CHECK-NEXT: movq %rax, %rdx
+; CHECK-NEXT: sarq $63, %rdx
+; CHECK-NEXT: andq %rcx, %rdx
+; CHECK-NEXT: orq %rax, %rdx
+; CHECK-NEXT: movq %rdx, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %r14w, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %rcx
+; CHECK-NEXT: movq %rax, %rdx
+; CHECK-NEXT: sarq $63, %rdx
+; CHECK-NEXT: andq %rcx, %rdx
+; CHECK-NEXT: orq %rax, %rdx
+; CHECK-NEXT: movq %rdx, %xmm0
+; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
+; CHECK-NEXT: movdqa %xmm0, %xmm2
+; CHECK-NEXT: pxor %xmm1, %xmm2
+; CHECK-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259455,9223372039002259455]
+; CHECK-NEXT: movdqa %xmm3, %xmm4
+; CHECK-NEXT: pcmpgtd %xmm2, %xmm4
+; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm3, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-NEXT: pand %xmm5, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; CHECK-NEXT: por %xmm2, %xmm4
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
+; CHECK-NEXT: pand %xmm4, %xmm0
+; CHECK-NEXT: pandn %xmm2, %xmm4
+; CHECK-NEXT: por %xmm0, %xmm4
+; CHECK-NEXT: movdqa (%rsp), %xmm6 # 16-byte Reload
+; CHECK-NEXT: pxor %xmm6, %xmm1
+; CHECK-NEXT: movdqa %xmm3, %xmm0
+; CHECK-NEXT: pcmpgtd %xmm1, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm0[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm3, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-NEXT: pand %xmm5, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: pand %xmm0, %xmm6
+; CHECK-NEXT: pandn %xmm2, %xmm0
+; CHECK-NEXT: por %xmm6, %xmm0
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm4[0,2]
+; CHECK-NEXT: addq $32, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui <4 x half> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
+ %conv6 = trunc <4 x i64> %spec.store.select to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
+; CHECK-LABEL: ustest_f16i32_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: subq $32, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset %rbx, -32
+; CHECK-NEXT: .cfi_offset %r14, -24
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movl %edx, %ebp
+; CHECK-NEXT: movl %esi, %ebx
+; CHECK-NEXT: movl %edi, %r14d
+; CHECK-NEXT: movzwl %cx, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movzwl %bp, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: punpcklqdq (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movzwl %bx, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %r14w, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movq %rax, %xmm2
+; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm2 = xmm2[0],mem[0]
+; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648]
+; CHECK-NEXT: movdqa %xmm2, %xmm1
+; CHECK-NEXT: pxor %xmm0, %xmm1
+; CHECK-NEXT: movdqa {{.*#+}} xmm3 = [2147483647,2147483647]
+; CHECK-NEXT: movdqa %xmm3, %xmm4
+; CHECK-NEXT: pcmpgtd %xmm1, %xmm4
+; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm3, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm6 = xmm1[1,1,3,3]
+; CHECK-NEXT: pand %xmm5, %xmm6
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
+; CHECK-NEXT: por %xmm6, %xmm1
+; CHECK-NEXT: movdqa {{.*#+}} xmm4 = [4294967295,4294967295]
+; CHECK-NEXT: pand %xmm1, %xmm2
+; CHECK-NEXT: pandn %xmm4, %xmm1
+; CHECK-NEXT: por %xmm2, %xmm1
+; CHECK-NEXT: movdqa (%rsp), %xmm7 # 16-byte Reload
+; CHECK-NEXT: movdqa %xmm7, %xmm2
+; CHECK-NEXT: pxor %xmm0, %xmm2
+; CHECK-NEXT: movdqa %xmm3, %xmm5
+; CHECK-NEXT: pcmpgtd %xmm2, %xmm5
+; CHECK-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
+; CHECK-NEXT: pcmpeqd %xmm3, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-NEXT: pand %xmm6, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3]
+; CHECK-NEXT: por %xmm2, %xmm3
+; CHECK-NEXT: movdqa %xmm7, %xmm2
+; CHECK-NEXT: pand %xmm3, %xmm2
+; CHECK-NEXT: pandn %xmm4, %xmm3
+; CHECK-NEXT: por %xmm2, %xmm3
+; CHECK-NEXT: movdqa %xmm3, %xmm2
+; CHECK-NEXT: pxor %xmm0, %xmm2
+; CHECK-NEXT: movdqa %xmm2, %xmm4
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm4
+; CHECK-NEXT: pcmpeqd %xmm0, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-NEXT: pand %xmm4, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; CHECK-NEXT: por %xmm2, %xmm4
+; CHECK-NEXT: pand %xmm3, %xmm4
+; CHECK-NEXT: movdqa %xmm1, %xmm2
+; CHECK-NEXT: pxor %xmm0, %xmm2
+; CHECK-NEXT: movdqa %xmm2, %xmm3
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm3
+; CHECK-NEXT: pcmpeqd %xmm0, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-NEXT: pand %xmm3, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
+; CHECK-NEXT: por %xmm2, %xmm0
+; CHECK-NEXT: pand %xmm1, %xmm0
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm4[0,2]
+; CHECK-NEXT: addq $32, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <4 x half> %x to <4 x i64>
+ %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
+ %spec.store.select7 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %spec.store.select, <4 x i64> zeroinitializer)
+ %conv6 = trunc <4 x i64> %spec.store.select7 to <4 x i32>
+ ret <4 x i32> %conv6
+}
+
+; i16 saturate
+
+define <2 x i16> @stest_f64i16_mm(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i16_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttpd2dq %xmm0, %xmm0
+; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <32767,32767,u,u>
+; CHECK-NEXT: movdqa %xmm1, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm2
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: pandn %xmm1, %xmm2
+; CHECK-NEXT: por %xmm0, %xmm2
+; CHECK-NEXT: movdqa {{.*#+}} xmm0 = <4294934528,4294934528,u,u>
+; CHECK-NEXT: movdqa %xmm2, %xmm1
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm1
+; CHECK-NEXT: pand %xmm1, %xmm2
+; CHECK-NEXT: pandn %xmm0, %xmm1
+; CHECK-NEXT: por %xmm2, %xmm1
+; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i32>
+ %spec.store.select = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %conv, <2 x i32> <i32 32767, i32 32767>)
+ %spec.store.select7 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %spec.store.select, <2 x i32> <i32 -32768, i32 -32768>)
+ %conv6 = trunc <2 x i32> %spec.store.select7 to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <2 x i16> @utest_f64i16_mm(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i16_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttpd2dq %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm2
+; CHECK-NEXT: psrad $31, %xmm2
+; CHECK-NEXT: addpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttpd2dq %xmm0, %xmm0
+; CHECK-NEXT: andpd %xmm2, %xmm0
+; CHECK-NEXT: orpd %xmm1, %xmm0
+; CHECK-NEXT: movapd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
+; CHECK-NEXT: xorpd %xmm0, %xmm1
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = <2147549183,2147549183,u,u>
+; CHECK-NEXT: pcmpgtd %xmm1, %xmm2
+; CHECK-NEXT: andpd %xmm2, %xmm0
+; CHECK-NEXT: andnpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-NEXT: orpd %xmm0, %xmm2
+; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,2,2,3,4,5,6,7]
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i32>
+ %spec.store.select = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %conv, <2 x i32> <i32 65535, i32 65535>)
+ %conv6 = trunc <2 x i32> %spec.store.select to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <2 x i16> @ustest_f64i16_mm(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i16_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttpd2dq %xmm0, %xmm0
+; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <65535,65535,u,u>
+; CHECK-NEXT: movdqa %xmm1, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm2
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: pandn %xmm1, %xmm2
+; CHECK-NEXT: por %xmm0, %xmm2
+; CHECK-NEXT: pxor %xmm0, %xmm0
+; CHECK-NEXT: movdqa %xmm2, %xmm1
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm1
+; CHECK-NEXT: pand %xmm2, %xmm1
+; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i32>
+ %spec.store.select = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %conv, <2 x i32> <i32 65535, i32 65535>)
+ %spec.store.select7 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %spec.store.select, <2 x i32> zeroinitializer)
+ %conv6 = trunc <2 x i32> %spec.store.select7 to <2 x i16>
+ ret <2 x i16> %conv6
+}
+
+define <4 x i16> @stest_f32i16_mm(<4 x float> %x) {
+; CHECK-LABEL: stest_f32i16_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttps2dq %xmm0, %xmm0
+; CHECK-NEXT: packssdw %xmm0, %xmm0
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i32>
+ %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %conv, <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
+ %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
+ %conv6 = trunc <4 x i32> %spec.store.select7 to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <4 x i16> @utest_f32i16_mm(<4 x float> %x) {
+; CHECK-LABEL: utest_f32i16_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttps2dq %xmm0, %xmm1
+; CHECK-NEXT: movdqa %xmm1, %xmm2
+; CHECK-NEXT: psrad $31, %xmm2
+; CHECK-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: cvttps2dq %xmm0, %xmm0
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
+; CHECK-NEXT: pxor %xmm0, %xmm1
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [2147549183,2147549183,2147549183,2147549183]
+; CHECK-NEXT: pcmpgtd %xmm1, %xmm2
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-NEXT: por %xmm0, %xmm2
+; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,2,2,3,4,5,6,7]
+; CHECK-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui <4 x float> %x to <4 x i32>
+ %spec.store.select = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %conv, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
+ %conv6 = trunc <4 x i32> %spec.store.select to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) {
+; CHECK-LABEL: ustest_f32i16_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cvttps2dq %xmm0, %xmm0
+; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
+; CHECK-NEXT: movdqa %xmm1, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm2
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: pandn %xmm1, %xmm2
+; CHECK-NEXT: por %xmm0, %xmm2
+; CHECK-NEXT: pxor %xmm0, %xmm0
+; CHECK-NEXT: movdqa %xmm2, %xmm1
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm1
+; CHECK-NEXT: pand %xmm2, %xmm1
+; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; CHECK-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <4 x float> %x to <4 x i32>
+ %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %conv, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>)
+ %spec.store.select7 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %spec.store.select, <4 x i32> zeroinitializer)
+ %conv6 = trunc <4 x i32> %spec.store.select7 to <4 x i16>
+ ret <4 x i16> %conv6
+}
+
+define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
+; CHECK-LABEL: stest_f16i16_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: pushq %r13
+; CHECK-NEXT: .cfi_def_cfa_offset 40
+; CHECK-NEXT: pushq %r12
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 56
+; CHECK-NEXT: subq $72, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 128
+; CHECK-NEXT: .cfi_offset %rbx, -56
+; CHECK-NEXT: .cfi_offset %r12, -48
+; CHECK-NEXT: .cfi_offset %r13, -40
+; CHECK-NEXT: .cfi_offset %r14, -32
+; CHECK-NEXT: .cfi_offset %r15, -24
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movl %r9d, %ebp
+; CHECK-NEXT: movl %r8d, %ebx
+; CHECK-NEXT: movl %ecx, %r13d
+; CHECK-NEXT: movl %edx, %r12d
+; CHECK-NEXT: movl %esi, %r15d
+; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %r14d
+; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movl %r14d, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %bp, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %bx, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %r13w, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %r12w, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %r15w, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 2-byte Folded Reload
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: packssdw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $72, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 56
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: popq %r12
+; CHECK-NEXT: .cfi_def_cfa_offset 40
+; CHECK-NEXT: popq %r13
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <8 x half> %x to <8 x i32>
+ %spec.store.select = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %conv, <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>)
+ %spec.store.select7 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %spec.store.select, <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
+ %conv6 = trunc <8 x i32> %spec.store.select7 to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
+; CHECK-LABEL: utesth_f16i16_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: pushq %r13
+; CHECK-NEXT: .cfi_def_cfa_offset 40
+; CHECK-NEXT: pushq %r12
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 56
+; CHECK-NEXT: subq $72, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 128
+; CHECK-NEXT: .cfi_offset %rbx, -56
+; CHECK-NEXT: .cfi_offset %r12, -48
+; CHECK-NEXT: .cfi_offset %r13, -40
+; CHECK-NEXT: .cfi_offset %r14, -32
+; CHECK-NEXT: .cfi_offset %r15, -24
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movl %r9d, %ebp
+; CHECK-NEXT: movl %r8d, %ebx
+; CHECK-NEXT: movl %ecx, %r13d
+; CHECK-NEXT: movl %edx, %r12d
+; CHECK-NEXT: movl %esi, %r15d
+; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %r14d
+; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movl %r14d, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %bp, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %bx, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %r13w, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %r12w, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %r15w, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 2-byte Folded Reload
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %rax
+; CHECK-NEXT: movd %eax, %xmm1
+; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
+; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
+; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; CHECK-NEXT: movdqa %xmm1, %xmm3
+; CHECK-NEXT: pxor %xmm2, %xmm3
+; CHECK-NEXT: movdqa {{.*#+}} xmm4 = [2147549183,2147549183,2147549183,2147549183]
+; CHECK-NEXT: movdqa %xmm4, %xmm0
+; CHECK-NEXT: pcmpgtd %xmm3, %xmm0
+; CHECK-NEXT: pand %xmm0, %xmm1
+; CHECK-NEXT: pcmpeqd %xmm3, %xmm3
+; CHECK-NEXT: pxor %xmm3, %xmm0
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: pxor %xmm1, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm2, %xmm4
+; CHECK-NEXT: pand %xmm4, %xmm1
+; CHECK-NEXT: pxor %xmm3, %xmm4
+; CHECK-NEXT: por %xmm1, %xmm4
+; CHECK-NEXT: pslld $16, %xmm4
+; CHECK-NEXT: psrad $16, %xmm4
+; CHECK-NEXT: pslld $16, %xmm0
+; CHECK-NEXT: psrad $16, %xmm0
+; CHECK-NEXT: packssdw %xmm4, %xmm0
+; CHECK-NEXT: addq $72, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 56
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: popq %r12
+; CHECK-NEXT: .cfi_def_cfa_offset 40
+; CHECK-NEXT: popq %r13
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui <8 x half> %x to <8 x i32>
+ %spec.store.select = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %conv, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>)
+ %conv6 = trunc <8 x i32> %spec.store.select to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
+; CHECK-LABEL: ustest_f16i16_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: pushq %r13
+; CHECK-NEXT: .cfi_def_cfa_offset 40
+; CHECK-NEXT: pushq %r12
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 56
+; CHECK-NEXT: subq $72, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 128
+; CHECK-NEXT: .cfi_offset %rbx, -56
+; CHECK-NEXT: .cfi_offset %r12, -48
+; CHECK-NEXT: .cfi_offset %r13, -40
+; CHECK-NEXT: .cfi_offset %r14, -32
+; CHECK-NEXT: .cfi_offset %r15, -24
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movl %r9d, %r15d
+; CHECK-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: movl %edx, %ebp
+; CHECK-NEXT: movl %esi, %r14d
+; CHECK-NEXT: movl %edi, %ebx
+; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %r12d
+; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %r13d
+; CHECK-NEXT: movzwl %cx, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %bp, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %r14w, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %bx, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movl %r13d, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movl %r12d, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl %r15w, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movzwl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 2-byte Folded Reload
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
+; CHECK-NEXT: movdqa %xmm1, %xmm2
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm2
+; CHECK-NEXT: pand %xmm2, %xmm0
+; CHECK-NEXT: pandn %xmm1, %xmm2
+; CHECK-NEXT: por %xmm0, %xmm2
+; CHECK-NEXT: movdqa %xmm1, %xmm3
+; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: pcmpgtd %xmm0, %xmm3
+; CHECK-NEXT: pand %xmm3, %xmm0
+; CHECK-NEXT: pandn %xmm1, %xmm3
+; CHECK-NEXT: por %xmm0, %xmm3
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: movdqa %xmm3, %xmm0
+; CHECK-NEXT: pcmpgtd %xmm1, %xmm0
+; CHECK-NEXT: pand %xmm3, %xmm0
+; CHECK-NEXT: movdqa %xmm2, %xmm3
+; CHECK-NEXT: pcmpgtd %xmm1, %xmm3
+; CHECK-NEXT: pand %xmm2, %xmm3
+; CHECK-NEXT: pslld $16, %xmm3
+; CHECK-NEXT: psrad $16, %xmm3
+; CHECK-NEXT: pslld $16, %xmm0
+; CHECK-NEXT: psrad $16, %xmm0
+; CHECK-NEXT: packssdw %xmm3, %xmm0
+; CHECK-NEXT: addq $72, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 56
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: popq %r12
+; CHECK-NEXT: .cfi_def_cfa_offset 40
+; CHECK-NEXT: popq %r13
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <8 x half> %x to <8 x i32>
+ %spec.store.select = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %conv, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>)
+ %spec.store.select7 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %spec.store.select, <8 x i32> zeroinitializer)
+ %conv6 = trunc <8 x i32> %spec.store.select7 to <8 x i16>
+ ret <8 x i16> %conv6
+}
+
+; i64 saturate
+
+define <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
+; CHECK-LABEL: stest_f64i64_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset %rbx, -24
+; CHECK-NEXT: .cfi_offset %r14, -16
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: callq __fixdfti at PLT
+; CHECK-NEXT: movq %rax, %rbx
+; CHECK-NEXT: movq %rdx, %r14
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq __fixdfti at PLT
+; CHECK-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
+; CHECK-NEXT: cmpq %rcx, %rax
+; CHECK-NEXT: movq %rcx, %rsi
+; CHECK-NEXT: cmovbq %rax, %rsi
+; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: cmovnsq %rcx, %rax
+; CHECK-NEXT: cmoveq %rsi, %rax
+; CHECK-NEXT: cmovnsq %rdi, %rdx
+; CHECK-NEXT: cmpq %rcx, %rbx
+; CHECK-NEXT: movq %rcx, %rsi
+; CHECK-NEXT: cmovbq %rbx, %rsi
+; CHECK-NEXT: testq %r14, %r14
+; CHECK-NEXT: cmovsq %rbx, %rcx
+; CHECK-NEXT: cmoveq %rsi, %rcx
+; CHECK-NEXT: cmovsq %r14, %rdi
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: movabsq $-9223372036854775808, %rbx # imm = 0x8000000000000000
+; CHECK-NEXT: movq %rbx, %rsi
+; CHECK-NEXT: cmovnsq %rcx, %rsi
+; CHECK-NEXT: cmpq %rbx, %rcx
+; CHECK-NEXT: cmovbeq %rbx, %rcx
+; CHECK-NEXT: cmpq $-1, %rdi
+; CHECK-NEXT: cmovneq %rsi, %rcx
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: movq %rbx, %rsi
+; CHECK-NEXT: cmovnsq %rax, %rsi
+; CHECK-NEXT: cmpq %rbx, %rax
+; CHECK-NEXT: cmovbeq %rbx, %rax
+; CHECK-NEXT: cmpq $-1, %rdx
+; CHECK-NEXT: cmovneq %rsi, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: movq %rcx, %xmm1
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
+; CHECK-LABEL: utest_f64i64_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset %rbx, -24
+; CHECK-NEXT: .cfi_offset %r14, -16
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: callq __fixunsdfti at PLT
+; CHECK-NEXT: movq %rax, %rbx
+; CHECK-NEXT: movq %rdx, %r14
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: callq __fixunsdfti at PLT
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: cmovneq %rcx, %rax
+; CHECK-NEXT: cmpq $1, %rdx
+; CHECK-NEXT: cmoveq %rcx, %rax
+; CHECK-NEXT: testq %r14, %r14
+; CHECK-NEXT: cmovneq %rcx, %rbx
+; CHECK-NEXT: cmpq $1, %r14
+; CHECK-NEXT: cmoveq %rcx, %rbx
+; CHECK-NEXT: movq %rbx, %xmm0
+; CHECK-NEXT: movq %rax, %xmm1
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui <2 x double> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
+; CHECK-LABEL: ustest_f64i64_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset %rbx, -24
+; CHECK-NEXT: .cfi_offset %r14, -16
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: callq __fixdfti at PLT
+; CHECK-NEXT: movq %rax, %rbx
+; CHECK-NEXT: movq %rdx, %r14
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq __fixdfti at PLT
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: movl $1, %esi
+; CHECK-NEXT: movl $1, %edi
+; CHECK-NEXT: cmovleq %rdx, %rdi
+; CHECK-NEXT: cmovgq %rcx, %rax
+; CHECK-NEXT: cmpq $1, %rdx
+; CHECK-NEXT: cmoveq %rcx, %rax
+; CHECK-NEXT: testq %r14, %r14
+; CHECK-NEXT: cmovleq %r14, %rsi
+; CHECK-NEXT: cmovgq %rcx, %rbx
+; CHECK-NEXT: cmpq $1, %r14
+; CHECK-NEXT: cmoveq %rcx, %rbx
+; CHECK-NEXT: testq %rsi, %rsi
+; CHECK-NEXT: cmovsq %rcx, %rbx
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: cmovsq %rcx, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: movq %rbx, %xmm1
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <2 x double> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> zeroinitializer)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
+; CHECK-LABEL: stest_f32i64_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset %rbx, -24
+; CHECK-NEXT: .cfi_offset %r14, -16
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; CHECK-NEXT: callq __fixsfti at PLT
+; CHECK-NEXT: movq %rax, %rbx
+; CHECK-NEXT: movq %rdx, %r14
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq __fixsfti at PLT
+; CHECK-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
+; CHECK-NEXT: cmpq %rcx, %rax
+; CHECK-NEXT: movq %rcx, %rsi
+; CHECK-NEXT: cmovbq %rax, %rsi
+; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: cmovnsq %rcx, %rax
+; CHECK-NEXT: cmoveq %rsi, %rax
+; CHECK-NEXT: cmovnsq %rdi, %rdx
+; CHECK-NEXT: cmpq %rcx, %rbx
+; CHECK-NEXT: movq %rcx, %rsi
+; CHECK-NEXT: cmovbq %rbx, %rsi
+; CHECK-NEXT: testq %r14, %r14
+; CHECK-NEXT: cmovsq %rbx, %rcx
+; CHECK-NEXT: cmoveq %rsi, %rcx
+; CHECK-NEXT: cmovsq %r14, %rdi
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: movabsq $-9223372036854775808, %rbx # imm = 0x8000000000000000
+; CHECK-NEXT: movq %rbx, %rsi
+; CHECK-NEXT: cmovnsq %rcx, %rsi
+; CHECK-NEXT: cmpq %rbx, %rcx
+; CHECK-NEXT: cmovbeq %rbx, %rcx
+; CHECK-NEXT: cmpq $-1, %rdi
+; CHECK-NEXT: cmovneq %rsi, %rcx
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: movq %rbx, %rsi
+; CHECK-NEXT: cmovnsq %rax, %rsi
+; CHECK-NEXT: cmpq %rbx, %rax
+; CHECK-NEXT: cmovbeq %rbx, %rax
+; CHECK-NEXT: cmpq $-1, %rdx
+; CHECK-NEXT: cmovneq %rsi, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: movq %rcx, %xmm1
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <2 x float> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
+; CHECK-LABEL: utest_f32i64_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset %rbx, -24
+; CHECK-NEXT: .cfi_offset %r14, -16
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: callq __fixunssfti at PLT
+; CHECK-NEXT: movq %rax, %rbx
+; CHECK-NEXT: movq %rdx, %r14
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; CHECK-NEXT: callq __fixunssfti at PLT
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: cmovneq %rcx, %rax
+; CHECK-NEXT: cmpq $1, %rdx
+; CHECK-NEXT: cmoveq %rcx, %rax
+; CHECK-NEXT: testq %r14, %r14
+; CHECK-NEXT: cmovneq %rcx, %rbx
+; CHECK-NEXT: cmpq $1, %r14
+; CHECK-NEXT: cmoveq %rcx, %rbx
+; CHECK-NEXT: movq %rbx, %xmm0
+; CHECK-NEXT: movq %rax, %xmm1
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui <2 x float> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
+; CHECK-LABEL: ustest_f32i64_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset %rbx, -24
+; CHECK-NEXT: .cfi_offset %r14, -16
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; CHECK-NEXT: callq __fixsfti at PLT
+; CHECK-NEXT: movq %rax, %rbx
+; CHECK-NEXT: movq %rdx, %r14
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: callq __fixsfti at PLT
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: movl $1, %esi
+; CHECK-NEXT: movl $1, %edi
+; CHECK-NEXT: cmovleq %rdx, %rdi
+; CHECK-NEXT: cmovgq %rcx, %rax
+; CHECK-NEXT: cmpq $1, %rdx
+; CHECK-NEXT: cmoveq %rcx, %rax
+; CHECK-NEXT: testq %r14, %r14
+; CHECK-NEXT: cmovleq %r14, %rsi
+; CHECK-NEXT: cmovgq %rcx, %rbx
+; CHECK-NEXT: cmpq $1, %r14
+; CHECK-NEXT: cmoveq %rcx, %rbx
+; CHECK-NEXT: testq %rsi, %rsi
+; CHECK-NEXT: cmovsq %rcx, %rbx
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: cmovsq %rcx, %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: movq %rbx, %xmm1
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <2 x float> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> zeroinitializer)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
+; CHECK-LABEL: stest_f16i64_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset %rbx, -32
+; CHECK-NEXT: .cfi_offset %r14, -24
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movl %esi, %ebp
+; CHECK-NEXT: movzwl %di, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: callq __fixsfti at PLT
+; CHECK-NEXT: movq %rax, %rbx
+; CHECK-NEXT: movq %rdx, %r14
+; CHECK-NEXT: movzwl %bp, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: callq __fixsfti at PLT
+; CHECK-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
+; CHECK-NEXT: cmpq %rcx, %rax
+; CHECK-NEXT: movq %rcx, %rsi
+; CHECK-NEXT: cmovbq %rax, %rsi
+; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: cmovnsq %rcx, %rax
+; CHECK-NEXT: cmoveq %rsi, %rax
+; CHECK-NEXT: cmovnsq %rdi, %rdx
+; CHECK-NEXT: cmpq %rcx, %rbx
+; CHECK-NEXT: movq %rcx, %rsi
+; CHECK-NEXT: cmovbq %rbx, %rsi
+; CHECK-NEXT: testq %r14, %r14
+; CHECK-NEXT: cmovsq %rbx, %rcx
+; CHECK-NEXT: cmoveq %rsi, %rcx
+; CHECK-NEXT: cmovsq %r14, %rdi
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: movabsq $-9223372036854775808, %rbp # imm = 0x8000000000000000
+; CHECK-NEXT: movq %rbp, %rsi
+; CHECK-NEXT: cmovnsq %rcx, %rsi
+; CHECK-NEXT: cmpq %rbp, %rcx
+; CHECK-NEXT: cmovbeq %rbp, %rcx
+; CHECK-NEXT: cmpq $-1, %rdi
+; CHECK-NEXT: cmovneq %rsi, %rcx
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: movq %rbp, %rsi
+; CHECK-NEXT: cmovnsq %rax, %rsi
+; CHECK-NEXT: cmpq %rbp, %rax
+; CHECK-NEXT: cmovbeq %rbp, %rax
+; CHECK-NEXT: cmpq $-1, %rdx
+; CHECK-NEXT: cmovneq %rsi, %rax
+; CHECK-NEXT: movq %rax, %xmm1
+; CHECK-NEXT: movq %rcx, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <2 x half> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 9223372036854775807, i128 9223372036854775807>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> <i128 -9223372036854775808, i128 -9223372036854775808>)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
+; CHECK-LABEL: utesth_f16i64_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset %rbx, -32
+; CHECK-NEXT: .cfi_offset %r14, -24
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movl %edi, %r14d
+; CHECK-NEXT: movzwl %si, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: callq __fixunssfti at PLT
+; CHECK-NEXT: movq %rax, %rbx
+; CHECK-NEXT: movq %rdx, %rbp
+; CHECK-NEXT: movzwl %r14w, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: callq __fixunssfti at PLT
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: cmovneq %rcx, %rax
+; CHECK-NEXT: cmpq $1, %rdx
+; CHECK-NEXT: cmoveq %rcx, %rax
+; CHECK-NEXT: testq %rbp, %rbp
+; CHECK-NEXT: cmovneq %rcx, %rbx
+; CHECK-NEXT: cmpq $1, %rbp
+; CHECK-NEXT: cmoveq %rcx, %rbx
+; CHECK-NEXT: movq %rbx, %xmm1
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptoui <2 x half> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %conv6 = trunc <2 x i128> %spec.store.select to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
+; CHECK-LABEL: ustest_f16i64_mm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset %rbx, -32
+; CHECK-NEXT: .cfi_offset %r14, -24
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movl %esi, %r14d
+; CHECK-NEXT: movzwl %di, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: callq __fixsfti at PLT
+; CHECK-NEXT: movq %rax, %rbx
+; CHECK-NEXT: movq %rdx, %rbp
+; CHECK-NEXT: movzwl %r14w, %edi
+; CHECK-NEXT: callq __gnu_h2f_ieee at PLT
+; CHECK-NEXT: callq __fixsfti at PLT
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: testq %rdx, %rdx
+; CHECK-NEXT: movl $1, %esi
+; CHECK-NEXT: movl $1, %edi
+; CHECK-NEXT: cmovleq %rdx, %rdi
+; CHECK-NEXT: cmovgq %rcx, %rax
+; CHECK-NEXT: cmpq $1, %rdx
+; CHECK-NEXT: cmoveq %rcx, %rax
+; CHECK-NEXT: testq %rbp, %rbp
+; CHECK-NEXT: cmovleq %rbp, %rsi
+; CHECK-NEXT: cmovgq %rcx, %rbx
+; CHECK-NEXT: cmpq $1, %rbp
+; CHECK-NEXT: cmoveq %rcx, %rbx
+; CHECK-NEXT: testq %rsi, %rsi
+; CHECK-NEXT: cmovsq %rcx, %rbx
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: cmovsq %rcx, %rax
+; CHECK-NEXT: movq %rax, %xmm1
+; CHECK-NEXT: movq %rbx, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %conv = fptosi <2 x half> %x to <2 x i128>
+ %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> <i128 18446744073709551616, i128 18446744073709551616>)
+ %spec.store.select7 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %spec.store.select, <2 x i128> zeroinitializer)
+ %conv6 = trunc <2 x i128> %spec.store.select7 to <2 x i64>
+ ret <2 x i64> %conv6
+}
+
+declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>)
+declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
+declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>)
+declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>)
+declare <8 x i32> @llvm.umin.v8i32(<8 x i32>, <8 x i32>)
+declare <2 x i64> @llvm.smin.v2i64(<2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.smax.v2i64(<2 x i64>, <2 x i64>)
+declare <2 x i64> @llvm.umin.v2i64(<2 x i64>, <2 x i64>)
+declare <4 x i64> @llvm.smin.v4i64(<4 x i64>, <4 x i64>)
+declare <4 x i64> @llvm.smax.v4i64(<4 x i64>, <4 x i64>)
+declare <4 x i64> @llvm.umin.v4i64(<4 x i64>, <4 x i64>)
+declare <2 x i128> @llvm.smin.v2i128(<2 x i128>, <2 x i128>)
+declare <2 x i128> @llvm.smax.v2i128(<2 x i128>, <2 x i128>)
+declare <2 x i128> @llvm.umin.v2i128(<2 x i128>, <2 x i128>)
More information about the llvm-commits
mailing list