[llvm] [WebAssembly] Change `half` to use soft promotion rather than `PromoteFloat` (PR #152833)

Fri Aug 8 22:50:20 PDT 2025

https://github.com/tgross35 created https://github.com/llvm/llvm-project/pull/152833

The default `half` legalization, which Wasm currently uses, does not respect IEEE conventions: for example, casting to bits may invoke alossy libcall, meaning soft float operations cannot be correctly implemented. Change to the soft promotion legalization which passes `f16` as an `i16` and treats each `half` operation as an individual f16->f32->libcall->f32->f16 sequence.

Of note in the test updates are that `from_bits` and `to_bits` are now libcall-free, and that chained operations now round back to `f16` after each step.

Fixes the wasm portion of https://github.com/llvm/llvm-project/issues/97981
Fixes the wasm portion of https://github.com/llvm/llvm-project/issues/97975
Fixes: https://github.com/llvm/llvm-project/issues/96438
Fixes: https://github.com/llvm/llvm-project/issues/96438

>From 7cbd5af1c111cccda47c3661093a313073cae31b Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Fri, 8 Aug 2025 14:11:41 -0500
Subject: [PATCH 1/2] [WebAssembly] Update the test for `half` (NFC)

Replace the existing `f16` test with the version that is uses for other
architectures (typically as `half.ll`). This still covers the
conversions from the existing test, but also adds checks for most simple
ops.

Additionally, rename `half-precision.ll` to `fp-intrinsics.ll` to keep
the name similar to this test.
---
 .../{half-precision.ll => f16-intrinsics.ll}  |   2 +
 llvm/test/CodeGen/WebAssembly/f16.ll          | 712 ++++++++++++++++--
 2 files changed, 650 insertions(+), 64 deletions(-)
 rename llvm/test/CodeGen/WebAssembly/{half-precision.ll => f16-intrinsics.ll} (99%)

diff --git a/llvm/test/CodeGen/WebAssembly/half-precision.ll b/llvm/test/CodeGen/WebAssembly/f16-intrinsics.ll
similarity index 99%
rename from llvm/test/CodeGen/WebAssembly/half-precision.ll
rename to llvm/test/CodeGen/WebAssembly/f16-intrinsics.ll
index 4e8ff5955c63b..8033ec5d310fa 100644
--- a/llvm/test/CodeGen/WebAssembly/half-precision.ll
+++ b/llvm/test/CodeGen/WebAssembly/f16-intrinsics.ll
@@ -1,5 +1,7 @@
 ; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128 | FileCheck %s
 
+; Tests for `llvm.wasm.*.*f16` intrinsics
+
 declare float @llvm.wasm.loadf32.f16(ptr)
 declare void @llvm.wasm.storef16.f32(float, ptr)
 
diff --git a/llvm/test/CodeGen/WebAssembly/f16.ll b/llvm/test/CodeGen/WebAssembly/f16.ll
index b67c0c16d4651..0486975f6cba7 100644
--- a/llvm/test/CodeGen/WebAssembly/f16.ll
+++ b/llvm/test/CodeGen/WebAssembly/f16.ll
@@ -1,69 +1,653 @@
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s
-; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers -fast-isel | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 
-; Test that f16 is expanded.
+; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-keep-registers            | FileCheck %s --check-prefixes=ALL,DEFISEL
+; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-keep-registers -fast-isel | FileCheck %s --check-prefixes=ALL,FASTISEL
+
+; Tests for various operations on half precison float. Much of the test is
+; copied from test/CodeGen/X86/half.ll.
 
 target triple = "wasm32-unknown-unknown"
 
-; CHECK-LABEL: demote.f32:
-; CHECK-NEXT: .functype demote.f32 (f32) -> (f32){{$}}
-; CHECK-NEXT: local.get	$push[[L0:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: call	$push[[L1:[0-9]+]]=, __truncsfhf2, $pop[[L0]]{{$}}
-; CHECK-NEXT: call	$push[[L2:[0-9]+]]=, __extendhfsf2, $pop[[L1]]{{$}}
-; CHECK-NEXT: return  	$pop[[L2]]{{$}}
-define half @demote.f32(float %f) {
-    %t = fptrunc float %f to half
-    ret half %t
-}
-
-; CHECK-LABEL: promote.f32:
-; CHECK-NEXT: .functype promote.f32 (f32) -> (f32){{$}}
-; CHECK-NEXT: local.get	$push0=, 0{{$}}
-; CHECK-NEXT: return  	$pop0{{$}}
-define float @promote.f32(half %f) {
-    %t = fpext half %f to float
-    ret float %t
-}
-
-; CHECK-LABEL: demote.f64:
-; CHECK-NEXT: .functype demote.f64 (f64) -> (f32){{$}}
-; CHECK-NEXT: local.get	$push[[L0:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: call	$push[[L1:[0-9]+]]=, __truncdfhf2, $pop[[L0]]{{$}}
-; CHECK-NEXT: call	$push[[L2:[0-9]+]]=, __extendhfsf2, $pop[[L1]]{{$}}
-; CHECK-NEXT: return  	$pop[[L2]]{{$}}
-define half @demote.f64(double %f) {
-    %t = fptrunc double %f to half
-    ret half %t
-}
-
-; CHECK-LABEL: promote.f64:
-; CHECK-NEXT: .functype promote.f64 (f32) -> (f64){{$}}
-; CHECK-NEXT: local.get	$push[[L0:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: f64.promote_f32 $push[[L1:[0-9]+]]=, $pop[[L0]]{{$}}
-; CHECK-NEXT: return  	$pop[[L1]]{{$}}
-define double @promote.f64(half %f) {
-    %t = fpext half %f to double
-    ret double %t
-}
-
-; CHECK-LABEL: demote.f128:
-; CHECK-NEXT: .functype demote.f128 (i64, i64) -> (f32){{$}}
-; CHECK-NEXT: local.get	$push[[L0:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: local.get	$push[[L1:[0-9]+]]=, 1{{$}}
-; CHECK-NEXT: call	$push[[L2:[0-9]+]]=, __trunctfhf2, $pop[[L0]], $pop[[L1]]{{$}}
-; CHECK-NEXT: call	$push[[L3:[0-9]+]]=, __extendhfsf2, $pop[[L2]]{{$}}
-; CHECK-NEXT: return  	$pop[[L3]]{{$}}
-define half @demote.f128(fp128 %f) {
-    %t = fptrunc fp128 %f to half
-    ret half %t
-}
-
-; CHECK-LABEL: promote.f128:
-; CHECK-NEXT: .functype promote.f128 (i32, f32) -> (){{$}}
-; CHECK: call __extendsftf2
-; CHECK: i64.store
-; CHECK: i64.store
-define fp128 @promote.f128(half %f) {
-    %t = fpext half %f to fp128
-    ret fp128 %t
+define void @store(half %x, ptr %p) nounwind {
+; ALL-LABEL: store:
+; ALL:         .functype store (f32, i32) -> ()
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push2=, 1
+; ALL-NEXT:    local.get $push1=, 0
+; ALL-NEXT:    call $push0=, __truncsfhf2, $pop1
+; ALL-NEXT:    i32.store16 0($pop2), $pop0
+; ALL-NEXT:    return
+  store half %x, ptr %p
+  ret void
+}
+
+define half @return(ptr %p) nounwind {
+; ALL-LABEL: return:
+; ALL:         .functype return (i32) -> (f32)
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push2=, 0
+; ALL-NEXT:    i32.load16_u $push0=, 0($pop2)
+; ALL-NEXT:    call $push1=, __extendhfsf2, $pop0
+; ALL-NEXT:    return $pop1
+  %r = load half, ptr %p
+  ret half %r
+}
+
+define dso_local double @loadd(ptr nocapture readonly %a) local_unnamed_addr nounwind {
+; DEFISEL-LABEL: loadd:
+; DEFISEL:         .functype loadd (i32) -> (f64)
+; DEFISEL-NEXT:  # %bb.0:
+; DEFISEL-NEXT:    local.get $push3=, 0
+; DEFISEL-NEXT:    i32.load16_u $push0=, 2($pop3)
+; DEFISEL-NEXT:    call $push1=, __extendhfsf2, $pop0
+; DEFISEL-NEXT:    f64.promote_f32 $push2=, $pop1
+; DEFISEL-NEXT:    return $pop2
+;
+; FASTISEL-LABEL: loadd:
+; FASTISEL:         .functype loadd (i32) -> (f64)
+; FASTISEL-NEXT:  # %bb.0:
+; FASTISEL-NEXT:    local.get $push3=, 0
+; FASTISEL-NEXT:    i32.load16_u $push2=, 2($pop3)
+; FASTISEL-NEXT:    call $push1=, __extendhfsf2, $pop2
+; FASTISEL-NEXT:    f64.promote_f32 $push0=, $pop1
+; FASTISEL-NEXT:    return $pop0
+  %arrayidx = getelementptr inbounds i16, ptr %a, i64 1
+  %x = load i16, ptr %arrayidx, align 2
+  %ret = tail call double @llvm.convert.from.fp16.f64(i16 %x)
+  ret double %ret
+}
+
+define dso_local float @loadf(ptr nocapture readonly %a) local_unnamed_addr nounwind {
+; DEFISEL-LABEL: loadf:
+; DEFISEL:         .functype loadf (i32) -> (f32)
+; DEFISEL-NEXT:  # %bb.0:
+; DEFISEL-NEXT:    local.get $push2=, 0
+; DEFISEL-NEXT:    i32.load16_u $push0=, 2($pop2)
+; DEFISEL-NEXT:    call $push1=, __extendhfsf2, $pop0
+; DEFISEL-NEXT:    return $pop1
+;
+; FASTISEL-LABEL: loadf:
+; FASTISEL:         .functype loadf (i32) -> (f32)
+; FASTISEL-NEXT:  # %bb.0:
+; FASTISEL-NEXT:    local.get $push2=, 0
+; FASTISEL-NEXT:    i32.load16_u $push1=, 2($pop2)
+; FASTISEL-NEXT:    call $push0=, __extendhfsf2, $pop1
+; FASTISEL-NEXT:    return $pop0
+  %arrayidx = getelementptr inbounds i16, ptr %a, i64 1
+  %x = load i16, ptr %arrayidx, align 2
+  %ret = tail call float @llvm.convert.from.fp16.f32(i16 %x)
+  ret float %ret
+}
+
+define dso_local void @stored(ptr nocapture %a, double %b) local_unnamed_addr nounwind {
+; DEFISEL-LABEL: stored:
+; DEFISEL:         .functype stored (i32, f64) -> ()
+; DEFISEL-NEXT:  # %bb.0:
+; DEFISEL-NEXT:    local.get $push2=, 0
+; DEFISEL-NEXT:    local.get $push1=, 1
+; DEFISEL-NEXT:    call $push0=, __truncdfhf2, $pop1
+; DEFISEL-NEXT:    i32.store16 0($pop2), $pop0
+; DEFISEL-NEXT:    return
+;
+; FASTISEL-LABEL: stored:
+; FASTISEL:         .functype stored (i32, f64) -> ()
+; FASTISEL-NEXT:  # %bb.0:
+; FASTISEL-NEXT:    local.get $push4=, 0
+; FASTISEL-NEXT:    local.get $push3=, 1
+; FASTISEL-NEXT:    call $push2=, __truncdfhf2, $pop3
+; FASTISEL-NEXT:    i32.const $push1=, 65535
+; FASTISEL-NEXT:    i32.and $push0=, $pop2, $pop1
+; FASTISEL-NEXT:    i32.store16 0($pop4), $pop0
+; FASTISEL-NEXT:    return
+  %x = tail call i16 @llvm.convert.to.fp16.f64(double %b)
+  store i16 %x, ptr %a, align 2
+  ret void
+}
+
+define dso_local void @storef(ptr nocapture %a, float %b) local_unnamed_addr nounwind {
+; DEFISEL-LABEL: storef:
+; DEFISEL:         .functype storef (i32, f32) -> ()
+; DEFISEL-NEXT:  # %bb.0:
+; DEFISEL-NEXT:    local.get $push2=, 0
+; DEFISEL-NEXT:    local.get $push1=, 1
+; DEFISEL-NEXT:    call $push0=, __truncsfhf2, $pop1
+; DEFISEL-NEXT:    i32.store16 0($pop2), $pop0
+; DEFISEL-NEXT:    return
+;
+; FASTISEL-LABEL: storef:
+; FASTISEL:         .functype storef (i32, f32) -> ()
+; FASTISEL-NEXT:  # %bb.0:
+; FASTISEL-NEXT:    local.get $push4=, 0
+; FASTISEL-NEXT:    local.get $push3=, 1
+; FASTISEL-NEXT:    call $push2=, __truncsfhf2, $pop3
+; FASTISEL-NEXT:    i32.const $push1=, 65535
+; FASTISEL-NEXT:    i32.and $push0=, $pop2, $pop1
+; FASTISEL-NEXT:    i32.store16 0($pop4), $pop0
+; FASTISEL-NEXT:    return
+  %x = tail call i16 @llvm.convert.to.fp16.f32(float %b)
+  store i16 %x, ptr %a, align 2
+  ret void
+}
+
+define void @test_load_store(ptr %in, ptr %out) nounwind {
+; ALL-LABEL: test_load_store:
+; ALL:         .functype test_load_store (i32, i32) -> ()
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push2=, 1
+; ALL-NEXT:    local.get $push1=, 0
+; ALL-NEXT:    i32.load16_u $push0=, 0($pop1)
+; ALL-NEXT:    i32.store16 0($pop2), $pop0
+; ALL-NEXT:    return
+  %val = load half, ptr %in
+  store half %val, ptr %out
+  ret void
+}
+
+define i16 @test_bitcast_from_half(ptr %addr) nounwind {
+; ALL-LABEL: test_bitcast_from_half:
+; ALL:         .functype test_bitcast_from_half (i32) -> (i32)
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push1=, 0
+; ALL-NEXT:    i32.load16_u $push0=, 0($pop1)
+; ALL-NEXT:    return $pop0
+  %val = load half, ptr %addr
+  %val_int = bitcast half %val to i16
+  ret i16 %val_int
+}
+
+define void @test_bitcast_to_half(ptr %addr, i16 %in) nounwind {
+; ALL-LABEL: test_bitcast_to_half:
+; ALL:         .functype test_bitcast_to_half (i32, i32) -> ()
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push1=, 0
+; ALL-NEXT:    local.get $push0=, 1
+; ALL-NEXT:    i32.store16 0($pop1), $pop0
+; ALL-NEXT:    return
+  %val_fp = bitcast i16 %in to half
+  store half %val_fp, ptr %addr
+  ret void
+}
+
+define half @from_bits(i16 %x) nounwind {
+; ALL-LABEL: from_bits:
+; ALL:         .functype from_bits (i32) -> (f32)
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push1=, 0
+; ALL-NEXT:    call $push0=, __extendhfsf2, $pop1
+; ALL-NEXT:    return $pop0
+  %res = bitcast i16 %x to half
+  ret half %res
+}
+
+define i16 @to_bits(half %x) nounwind {
+; DEFISEL-LABEL: to_bits:
+; DEFISEL:         .functype to_bits (f32) -> (i32)
+; DEFISEL-NEXT:  # %bb.0:
+; DEFISEL-NEXT:    local.get $push3=, 0
+; DEFISEL-NEXT:    call $push1=, __truncsfhf2, $pop3
+; DEFISEL-NEXT:    i32.const $push0=, 65535
+; DEFISEL-NEXT:    i32.and $push2=, $pop1, $pop0
+; DEFISEL-NEXT:    return $pop2
+;
+; FASTISEL-LABEL: to_bits:
+; FASTISEL:         .functype to_bits (f32) -> (i32)
+; FASTISEL-NEXT:  # %bb.0:
+; FASTISEL-NEXT:    local.get $push3=, 0
+; FASTISEL-NEXT:    call $push2=, __truncsfhf2, $pop3
+; FASTISEL-NEXT:    i32.const $push1=, 65535
+; FASTISEL-NEXT:    i32.and $push0=, $pop2, $pop1
+; FASTISEL-NEXT:    return $pop0
+    %res = bitcast half %x to i16
+    ret i16 %res
+}
+
+define float @test_extend32(ptr %addr) nounwind {
+; DEFISEL-LABEL: test_extend32:
+; DEFISEL:         .functype test_extend32 (i32) -> (f32)
+; DEFISEL-NEXT:  # %bb.0:
+; DEFISEL-NEXT:    local.get $push2=, 0
+; DEFISEL-NEXT:    i32.load16_u $push0=, 0($pop2)
+; DEFISEL-NEXT:    call $push1=, __extendhfsf2, $pop0
+; DEFISEL-NEXT:    return $pop1
+;
+; FASTISEL-LABEL: test_extend32:
+; FASTISEL:         .functype test_extend32 (i32) -> (f32)
+; FASTISEL-NEXT:  # %bb.0:
+; FASTISEL-NEXT:    local.get $push2=, 0
+; FASTISEL-NEXT:    i32.load16_u $push1=, 0($pop2)
+; FASTISEL-NEXT:    call $push0=, __extendhfsf2, $pop1
+; FASTISEL-NEXT:    return $pop0
+  %val16 = load half, ptr %addr
+  %val32 = fpext half %val16 to float
+  ret float %val32
+}
+
+define double @test_extend64(ptr %addr) nounwind {
+; DEFISEL-LABEL: test_extend64:
+; DEFISEL:         .functype test_extend64 (i32) -> (f64)
+; DEFISEL-NEXT:  # %bb.0:
+; DEFISEL-NEXT:    local.get $push3=, 0
+; DEFISEL-NEXT:    i32.load16_u $push0=, 0($pop3)
+; DEFISEL-NEXT:    call $push1=, __extendhfsf2, $pop0
+; DEFISEL-NEXT:    f64.promote_f32 $push2=, $pop1
+; DEFISEL-NEXT:    return $pop2
+;
+; FASTISEL-LABEL: test_extend64:
+; FASTISEL:         .functype test_extend64 (i32) -> (f64)
+; FASTISEL-NEXT:  # %bb.0:
+; FASTISEL-NEXT:    local.get $push3=, 0
+; FASTISEL-NEXT:    i32.load16_u $push1=, 0($pop3)
+; FASTISEL-NEXT:    call $push2=, __extendhfsf2, $pop1
+; FASTISEL-NEXT:    f64.promote_f32 $push0=, $pop2
+; FASTISEL-NEXT:    return $pop0
+  %val16 = load half, ptr %addr
+  %val32 = fpext half %val16 to double
+  ret double %val32
+}
+
+define fp128 @test_extend128(ptr %addr) nounwind {
+; ALL-LABEL: test_extend128:
+; ALL:         .functype test_extend128 (i32, i32) -> ()
+; ALL-NEXT:    .local i32
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    global.get $push4=, __stack_pointer
+; ALL-NEXT:    i32.const $push5=, 16
+; ALL-NEXT:    i32.sub $push9=, $pop4, $pop5
+; ALL-NEXT:    local.tee $push8=, 2, $pop9
+; ALL-NEXT:    global.set __stack_pointer, $pop8
+; ALL-NEXT:    local.get $push11=, 2
+; ALL-NEXT:    local.get $push10=, 1
+; ALL-NEXT:    i32.load16_u $push0=, 0($pop10)
+; ALL-NEXT:    call $push1=, __extendhfsf2, $pop0
+; ALL-NEXT:    call __extendsftf2, $pop11, $pop1
+; ALL-NEXT:    local.get $push13=, 0
+; ALL-NEXT:    local.get $push12=, 2
+; ALL-NEXT:    i64.load $push2=, 8($pop12)
+; ALL-NEXT:    i64.store 8($pop13), $pop2
+; ALL-NEXT:    local.get $push15=, 0
+; ALL-NEXT:    local.get $push14=, 2
+; ALL-NEXT:    i64.load $push3=, 0($pop14)
+; ALL-NEXT:    i64.store 0($pop15), $pop3
+; ALL-NEXT:    local.get $push16=, 2
+; ALL-NEXT:    i32.const $push6=, 16
+; ALL-NEXT:    i32.add $push7=, $pop16, $pop6
+; ALL-NEXT:    global.set __stack_pointer, $pop7
+; ALL-NEXT:    return
+  %val16 = load half, ptr %addr
+  %val32 = fpext half %val16 to fp128
+  ret fp128 %val32
+}
+
+define void @test_trunc32(float %in, ptr %addr) nounwind {
+; ALL-LABEL: test_trunc32:
+; ALL:         .functype test_trunc32 (f32, i32) -> ()
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push2=, 1
+; ALL-NEXT:    local.get $push1=, 0
+; ALL-NEXT:    call $push0=, __truncsfhf2, $pop1
+; ALL-NEXT:    i32.store16 0($pop2), $pop0
+; ALL-NEXT:    return
+  %val16 = fptrunc float %in to half
+  store half %val16, ptr %addr
+  ret void
+}
+
+define void @test_trunc64(double %in, ptr %addr) nounwind {
+; ALL-LABEL: test_trunc64:
+; ALL:         .functype test_trunc64 (f64, i32) -> ()
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push2=, 1
+; ALL-NEXT:    local.get $push1=, 0
+; ALL-NEXT:    call $push0=, __truncdfhf2, $pop1
+; ALL-NEXT:    i32.store16 0($pop2), $pop0
+; ALL-NEXT:    return
+  %val16 = fptrunc double %in to half
+  store half %val16, ptr %addr
+  ret void
+}
+
+define void @test_trunc128(fp128 %in, ptr %addr) nounwind {
+; ALL-LABEL: test_trunc128:
+; ALL:         .functype test_trunc128 (i64, i64, i32) -> ()
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push3=, 2
+; ALL-NEXT:    local.get $push2=, 0
+; ALL-NEXT:    local.get $push1=, 1
+; ALL-NEXT:    call $push0=, __trunctfhf2, $pop2, $pop1
+; ALL-NEXT:    i32.store16 0($pop3), $pop0
+; ALL-NEXT:    return
+  %val16 = fptrunc fp128 %in to half
+  store half %val16, ptr %addr
+  ret void
+}
+
+define i64 @test_fptosi_i64(ptr %p) nounwind {
+; DEFISEL-LABEL: test_fptosi_i64:
+; DEFISEL:         .functype test_fptosi_i64 (i32) -> (i64)
+; DEFISEL-NEXT:  # %bb.0:
+; DEFISEL-NEXT:    local.get $push3=, 0
+; DEFISEL-NEXT:    i32.load16_u $push0=, 0($pop3)
+; DEFISEL-NEXT:    call $push1=, __extendhfsf2, $pop0
+; DEFISEL-NEXT:    i64.trunc_sat_f32_s $push2=, $pop1
+; DEFISEL-NEXT:    return $pop2
+;
+; FASTISEL-LABEL: test_fptosi_i64:
+; FASTISEL:         .functype test_fptosi_i64 (i32) -> (i64)
+; FASTISEL-NEXT:  # %bb.0:
+; FASTISEL-NEXT:    local.get $push3=, 0
+; FASTISEL-NEXT:    i32.load16_u $push1=, 0($pop3)
+; FASTISEL-NEXT:    call $push2=, __extendhfsf2, $pop1
+; FASTISEL-NEXT:    i64.trunc_sat_f32_s $push0=, $pop2
+; FASTISEL-NEXT:    return $pop0
+  %a = load half, ptr %p, align 2
+  %r = fptosi half %a to i64
+  ret i64 %r
+}
+
+define void @test_sitofp_i64(i64 %a, ptr %p) nounwind {
+; ALL-LABEL: test_sitofp_i64:
+; ALL:         .functype test_sitofp_i64 (i64, i32) -> ()
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push3=, 1
+; ALL-NEXT:    local.get $push2=, 0
+; ALL-NEXT:    f32.convert_i64_s $push0=, $pop2
+; ALL-NEXT:    call $push1=, __truncsfhf2, $pop0
+; ALL-NEXT:    i32.store16 0($pop3), $pop1
+; ALL-NEXT:    return
+  %r = sitofp i64 %a to half
+  store half %r, ptr %p
+  ret void
+}
+
+define i64 @test_fptoui_i64(ptr %p) nounwind {
+; DEFISEL-LABEL: test_fptoui_i64:
+; DEFISEL:         .functype test_fptoui_i64 (i32) -> (i64)
+; DEFISEL-NEXT:  # %bb.0:
+; DEFISEL-NEXT:    local.get $push3=, 0
+; DEFISEL-NEXT:    i32.load16_u $push0=, 0($pop3)
+; DEFISEL-NEXT:    call $push1=, __extendhfsf2, $pop0
+; DEFISEL-NEXT:    i64.trunc_sat_f32_u $push2=, $pop1
+; DEFISEL-NEXT:    return $pop2
+;
+; FASTISEL-LABEL: test_fptoui_i64:
+; FASTISEL:         .functype test_fptoui_i64 (i32) -> (i64)
+; FASTISEL-NEXT:  # %bb.0:
+; FASTISEL-NEXT:    local.get $push3=, 0
+; FASTISEL-NEXT:    i32.load16_u $push1=, 0($pop3)
+; FASTISEL-NEXT:    call $push2=, __extendhfsf2, $pop1
+; FASTISEL-NEXT:    i64.trunc_sat_f32_u $push0=, $pop2
+; FASTISEL-NEXT:    return $pop0
+  %a = load half, ptr %p, align 2
+  %r = fptoui half %a to i64
+  ret i64 %r
+}
+
+define void @test_uitofp_i64(i64 %a, ptr %p) nounwind {
+; ALL-LABEL: test_uitofp_i64:
+; ALL:         .functype test_uitofp_i64 (i64, i32) -> ()
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push3=, 1
+; ALL-NEXT:    local.get $push2=, 0
+; ALL-NEXT:    f32.convert_i64_u $push0=, $pop2
+; ALL-NEXT:    call $push1=, __truncsfhf2, $pop0
+; ALL-NEXT:    i32.store16 0($pop3), $pop1
+; ALL-NEXT:    return
+  %r = uitofp i64 %a to half
+  store half %r, ptr %p
+  ret void
+}
+
+define <4 x float> @test_extend32_vec4(ptr %p) nounwind {
+; ALL-LABEL: test_extend32_vec4:
+; ALL:         .functype test_extend32_vec4 (i32, i32) -> ()
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push9=, 0
+; ALL-NEXT:    local.get $push8=, 1
+; ALL-NEXT:    i32.load16_u $push0=, 6($pop8)
+; ALL-NEXT:    call $push1=, __extendhfsf2, $pop0
+; ALL-NEXT:    f32.store 12($pop9), $pop1
+; ALL-NEXT:    local.get $push11=, 0
+; ALL-NEXT:    local.get $push10=, 1
+; ALL-NEXT:    i32.load16_u $push2=, 4($pop10)
+; ALL-NEXT:    call $push3=, __extendhfsf2, $pop2
+; ALL-NEXT:    f32.store 8($pop11), $pop3
+; ALL-NEXT:    local.get $push13=, 0
+; ALL-NEXT:    local.get $push12=, 1
+; ALL-NEXT:    i32.load16_u $push4=, 2($pop12)
+; ALL-NEXT:    call $push5=, __extendhfsf2, $pop4
+; ALL-NEXT:    f32.store 4($pop13), $pop5
+; ALL-NEXT:    local.get $push15=, 0
+; ALL-NEXT:    local.get $push14=, 1
+; ALL-NEXT:    i32.load16_u $push6=, 0($pop14)
+; ALL-NEXT:    call $push7=, __extendhfsf2, $pop6
+; ALL-NEXT:    f32.store 0($pop15), $pop7
+; ALL-NEXT:    return
+  %a = load <4 x half>, ptr %p, align 8
+  %b = fpext <4 x half> %a to <4 x float>
+  ret <4 x float> %b
+}
+
+define <4 x double> @test_extend64_vec4(ptr %p) nounwind {
+; ALL-LABEL: test_extend64_vec4:
+; ALL:         .functype test_extend64_vec4 (i32, i32) -> ()
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push13=, 0
+; ALL-NEXT:    local.get $push12=, 1
+; ALL-NEXT:    i64.load16_u $push0=, 6($pop12)
+; ALL-NEXT:    call $push1=, __extendhfsf2, $pop0
+; ALL-NEXT:    f64.promote_f32 $push2=, $pop1
+; ALL-NEXT:    f64.store 24($pop13), $pop2
+; ALL-NEXT:    local.get $push15=, 0
+; ALL-NEXT:    local.get $push14=, 1
+; ALL-NEXT:    i64.load16_u $push3=, 4($pop14)
+; ALL-NEXT:    call $push4=, __extendhfsf2, $pop3
+; ALL-NEXT:    f64.promote_f32 $push5=, $pop4
+; ALL-NEXT:    f64.store 16($pop15), $pop5
+; ALL-NEXT:    local.get $push17=, 0
+; ALL-NEXT:    local.get $push16=, 1
+; ALL-NEXT:    i64.load16_u $push6=, 2($pop16)
+; ALL-NEXT:    call $push7=, __extendhfsf2, $pop6
+; ALL-NEXT:    f64.promote_f32 $push8=, $pop7
+; ALL-NEXT:    f64.store 8($pop17), $pop8
+; ALL-NEXT:    local.get $push19=, 0
+; ALL-NEXT:    local.get $push18=, 1
+; ALL-NEXT:    i64.load16_u $push9=, 0($pop18)
+; ALL-NEXT:    call $push10=, __extendhfsf2, $pop9
+; ALL-NEXT:    f64.promote_f32 $push11=, $pop10
+; ALL-NEXT:    f64.store 0($pop19), $pop11
+; ALL-NEXT:    return
+  %a = load <4 x half>, ptr %p, align 8
+  %b = fpext <4 x half> %a to <4 x double>
+  ret <4 x double> %b
+}
+
+define void @test_trunc32_vec4(<4 x float> %a, ptr %p) nounwind {
+; DEFISEL-LABEL: test_trunc32_vec4:
+; DEFISEL:         .functype test_trunc32_vec4 (f32, f32, f32, f32, i32) -> ()
+; DEFISEL-NEXT:  # %bb.0:
+; DEFISEL-NEXT:    local.get $push5=, 4
+; DEFISEL-NEXT:    local.get $push4=, 3
+; DEFISEL-NEXT:    call $push0=, __truncsfhf2, $pop4
+; DEFISEL-NEXT:    i32.store16 6($pop5), $pop0
+; DEFISEL-NEXT:    local.get $push7=, 4
+; DEFISEL-NEXT:    local.get $push6=, 2
+; DEFISEL-NEXT:    call $push1=, __truncsfhf2, $pop6
+; DEFISEL-NEXT:    i32.store16 4($pop7), $pop1
+; DEFISEL-NEXT:    local.get $push9=, 4
+; DEFISEL-NEXT:    local.get $push8=, 1
+; DEFISEL-NEXT:    call $push2=, __truncsfhf2, $pop8
+; DEFISEL-NEXT:    i32.store16 2($pop9), $pop2
+; DEFISEL-NEXT:    local.get $push11=, 4
+; DEFISEL-NEXT:    local.get $push10=, 0
+; DEFISEL-NEXT:    call $push3=, __truncsfhf2, $pop10
+; DEFISEL-NEXT:    i32.store16 0($pop11), $pop3
+; DEFISEL-NEXT:    return
+;
+; FASTISEL-LABEL: test_trunc32_vec4:
+; FASTISEL:         .functype test_trunc32_vec4 (f32, f32, f32, f32, i32) -> ()
+; FASTISEL-NEXT:  # %bb.0:
+; FASTISEL-NEXT:    local.get $push5=, 4
+; FASTISEL-NEXT:    local.get $push4=, 0
+; FASTISEL-NEXT:    call $push0=, __truncsfhf2, $pop4
+; FASTISEL-NEXT:    i32.store16 0($pop5), $pop0
+; FASTISEL-NEXT:    local.get $push7=, 4
+; FASTISEL-NEXT:    local.get $push6=, 1
+; FASTISEL-NEXT:    call $push1=, __truncsfhf2, $pop6
+; FASTISEL-NEXT:    i32.store16 2($pop7), $pop1
+; FASTISEL-NEXT:    local.get $push9=, 4
+; FASTISEL-NEXT:    local.get $push8=, 2
+; FASTISEL-NEXT:    call $push2=, __truncsfhf2, $pop8
+; FASTISEL-NEXT:    i32.store16 4($pop9), $pop2
+; FASTISEL-NEXT:    local.get $push11=, 4
+; FASTISEL-NEXT:    local.get $push10=, 3
+; FASTISEL-NEXT:    call $push3=, __truncsfhf2, $pop10
+; FASTISEL-NEXT:    i32.store16 6($pop11), $pop3
+; FASTISEL-NEXT:    return
+  %v = fptrunc <4 x float> %a to <4 x half>
+  store <4 x half> %v, ptr %p
+  ret void
+}
+
+define void @test_trunc64_vec4(<4 x double> %a, ptr %p) nounwind {
+; DEFISEL-LABEL: test_trunc64_vec4:
+; DEFISEL:         .functype test_trunc64_vec4 (f64, f64, f64, f64, i32) -> ()
+; DEFISEL-NEXT:  # %bb.0:
+; DEFISEL-NEXT:    local.get $push5=, 4
+; DEFISEL-NEXT:    local.get $push4=, 3
+; DEFISEL-NEXT:    call $push0=, __truncdfhf2, $pop4
+; DEFISEL-NEXT:    i32.store16 6($pop5), $pop0
+; DEFISEL-NEXT:    local.get $push7=, 4
+; DEFISEL-NEXT:    local.get $push6=, 2
+; DEFISEL-NEXT:    call $push1=, __truncdfhf2, $pop6
+; DEFISEL-NEXT:    i32.store16 4($pop7), $pop1
+; DEFISEL-NEXT:    local.get $push9=, 4
+; DEFISEL-NEXT:    local.get $push8=, 1
+; DEFISEL-NEXT:    call $push2=, __truncdfhf2, $pop8
+; DEFISEL-NEXT:    i32.store16 2($pop9), $pop2
+; DEFISEL-NEXT:    local.get $push11=, 4
+; DEFISEL-NEXT:    local.get $push10=, 0
+; DEFISEL-NEXT:    call $push3=, __truncdfhf2, $pop10
+; DEFISEL-NEXT:    i32.store16 0($pop11), $pop3
+; DEFISEL-NEXT:    return
+;
+; FASTISEL-LABEL: test_trunc64_vec4:
+; FASTISEL:         .functype test_trunc64_vec4 (f64, f64, f64, f64, i32) -> ()
+; FASTISEL-NEXT:  # %bb.0:
+; FASTISEL-NEXT:    local.get $push5=, 4
+; FASTISEL-NEXT:    local.get $push4=, 0
+; FASTISEL-NEXT:    call $push0=, __truncdfhf2, $pop4
+; FASTISEL-NEXT:    i32.store16 0($pop5), $pop0
+; FASTISEL-NEXT:    local.get $push7=, 4
+; FASTISEL-NEXT:    local.get $push6=, 1
+; FASTISEL-NEXT:    call $push1=, __truncdfhf2, $pop6
+; FASTISEL-NEXT:    i32.store16 2($pop7), $pop1
+; FASTISEL-NEXT:    local.get $push9=, 4
+; FASTISEL-NEXT:    local.get $push8=, 2
+; FASTISEL-NEXT:    call $push2=, __truncdfhf2, $pop8
+; FASTISEL-NEXT:    i32.store16 4($pop9), $pop2
+; FASTISEL-NEXT:    local.get $push11=, 4
+; FASTISEL-NEXT:    local.get $push10=, 3
+; FASTISEL-NEXT:    call $push3=, __truncdfhf2, $pop10
+; FASTISEL-NEXT:    i32.store16 6($pop11), $pop3
+; FASTISEL-NEXT:    return
+  %v = fptrunc <4 x double> %a to <4 x half>
+  store <4 x half> %v, ptr %p
+  ret void
+}
+
+define float @test_sitofp_fadd_i32(i32 %a, ptr %b) nounwind {
+; DEFISEL-LABEL: test_sitofp_fadd_i32:
+; DEFISEL:         .functype test_sitofp_fadd_i32 (i32, i32) -> (f32)
+; DEFISEL-NEXT:  # %bb.0:
+; DEFISEL-NEXT:    local.get $push6=, 1
+; DEFISEL-NEXT:    i32.load16_u $push1=, 0($pop6)
+; DEFISEL-NEXT:    call $push2=, __extendhfsf2, $pop1
+; DEFISEL-NEXT:    local.get $push7=, 0
+; DEFISEL-NEXT:    f32.convert_i32_s $push0=, $pop7
+; DEFISEL-NEXT:    call $push3=, __truncsfhf2, $pop0
+; DEFISEL-NEXT:    call $push4=, __extendhfsf2, $pop3
+; DEFISEL-NEXT:    f32.add $push5=, $pop2, $pop4
+; DEFISEL-NEXT:    return $pop5
+;
+; FASTISEL-LABEL: test_sitofp_fadd_i32:
+; FASTISEL:         .functype test_sitofp_fadd_i32 (i32, i32) -> (f32)
+; FASTISEL-NEXT:  # %bb.0:
+; FASTISEL-NEXT:    local.get $push6=, 1
+; FASTISEL-NEXT:    i32.load16_u $push2=, 0($pop6)
+; FASTISEL-NEXT:    call $push3=, __extendhfsf2, $pop2
+; FASTISEL-NEXT:    local.get $push7=, 0
+; FASTISEL-NEXT:    f32.convert_i32_s $push1=, $pop7
+; FASTISEL-NEXT:    call $push4=, __truncsfhf2, $pop1
+; FASTISEL-NEXT:    call $push5=, __extendhfsf2, $pop4
+; FASTISEL-NEXT:    f32.add $push0=, $pop3, $pop5
+; FASTISEL-NEXT:    return $pop0
+  %tmp0 = load half, ptr %b
+  %tmp1 = sitofp i32 %a to half
+  %tmp2 = fadd half %tmp0, %tmp1
+  %tmp3 = fpext half %tmp2 to float
+  ret float %tmp3
+}
+
+define half @chained_fp_ops(half %x) {
+; ALL-LABEL: chained_fp_ops:
+; ALL:         .functype chained_fp_ops (f32) -> (f32)
+; ALL-NEXT:  # %bb.0: # %start
+; ALL-NEXT:    local.get $push6=, 0
+; ALL-NEXT:    call $push0=, __truncsfhf2, $pop6
+; ALL-NEXT:    call $push5=, __extendhfsf2, $pop0
+; ALL-NEXT:    local.tee $push4=, 0, $pop5
+; ALL-NEXT:    local.get $push7=, 0
+; ALL-NEXT:    f32.add $push1=, $pop4, $pop7
+; ALL-NEXT:    f32.const $push2=, 0x1p-1
+; ALL-NEXT:    f32.mul $push3=, $pop1, $pop2
+; ALL-NEXT:    return $pop3
+start:
+  %y = fmul half %x, 0xH4000
+  %z = fdiv half %y, 0xH4000
+  ret half %z
+}
+
+define half @test_select_cc(half) nounwind {
+; ALL-LABEL: test_select_cc:
+; ALL:         .functype test_select_cc (f32) -> (f32)
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    f32.const $push4=, 0x1p0
+; ALL-NEXT:    f32.const $push0=, 0x0p0
+; ALL-NEXT:    local.get $push7=, 0
+; ALL-NEXT:    call $push1=, __truncsfhf2, $pop7
+; ALL-NEXT:    call $push2=, __extendhfsf2, $pop1
+; ALL-NEXT:    f32.const $push6=, 0x0p0
+; ALL-NEXT:    f32.ne $push3=, $pop2, $pop6
+; ALL-NEXT:    f32.select $push5=, $pop4, $pop0, $pop3
+; ALL-NEXT:    return $pop5
+  %2 = fcmp une half %0, 0xH0000
+  %3 = uitofp i1 %2 to half
+  ret half %3
+}
+
+define half @fabs(half %x) nounwind {
+; ALL-LABEL: fabs:
+; ALL:         .functype fabs (f32) -> (f32)
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push3=, 0
+; ALL-NEXT:    call $push0=, __truncsfhf2, $pop3
+; ALL-NEXT:    call $push1=, __extendhfsf2, $pop0
+; ALL-NEXT:    f32.abs $push2=, $pop1
+; ALL-NEXT:    return $pop2
+  %a = call half @llvm.fabs.f16(half %x)
+  ret half %a
+}
+
+define half @fcopysign(half %x, half %y) nounwind {
+; ALL-LABEL: fcopysign:
+; ALL:         .functype fcopysign (f32, f32) -> (f32)
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push3=, 0
+; ALL-NEXT:    call $push0=, __truncsfhf2, $pop3
+; ALL-NEXT:    call $push1=, __extendhfsf2, $pop0
+; ALL-NEXT:    local.get $push4=, 1
+; ALL-NEXT:    f32.copysign $push2=, $pop1, $pop4
+; ALL-NEXT:    return $pop2
+  %a = call half @llvm.copysign.f16(half %x, half %y)
+  ret half %a
 }

>From 57babd0999cf26824efb40124ed79324ef17e495 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Fri, 8 Aug 2025 23:43:02 -0500
Subject: [PATCH 2/2] [WebAssembly] Change `half` to use soft promotion rather
 than `PromoteFloat`

The default `half` legalization, which Wasm currently uses, does not
respect IEEE conventions: for example, casting to bits may invoke a
lossy libcall, meaning soft float operations cannot be correctly
implemented. Change to the soft promotion legalization which passes
`f16` as an `i16` and treats each `half` operation as an individual
f16->f32->libcall->f32->f16 sequence.

Of note in the test updates are that `from_bits` and `to_bits` are now
libcall-free, and that chained operations now round back to `f16` after
each step.

Fixes the wasm portion of https://github.com/llvm/llvm-project/issues/97981
Fixes the wasm portion of https://github.com/llvm/llvm-project/issues/97975
Fixes: https://github.com/llvm/llvm-project/issues/96438
Fixes: https://github.com/llvm/llvm-project/issues/96438
---
 .../WebAssembly/WebAssemblyISelLowering.h     |   2 +
 llvm/test/CodeGen/WebAssembly/f16.ll          | 202 +++++------
 llvm/test/CodeGen/WebAssembly/fpclamptosat.ll | 127 +++----
 .../CodeGen/WebAssembly/fpclamptosat_vec.ll   | 330 +++++++-----------
 llvm/test/CodeGen/WebAssembly/llvm.sincos.ll  |  50 ++-
 5 files changed, 287 insertions(+), 424 deletions(-)

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index 72401a7a259c0..e0ce3d1dcb620 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -40,6 +40,8 @@ class WebAssemblyTargetLowering final : public TargetLowering {
   MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override;
   MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const override;
 
+  bool softPromoteHalfType() const override { return true; }
+
 private:
   /// Keep a pointer to the WebAssemblySubtarget around so that we can make the
   /// right decision when generating code for different targets.
diff --git a/llvm/test/CodeGen/WebAssembly/f16.ll b/llvm/test/CodeGen/WebAssembly/f16.ll
index 0486975f6cba7..3c31d55abfadc 100644
--- a/llvm/test/CodeGen/WebAssembly/f16.ll
+++ b/llvm/test/CodeGen/WebAssembly/f16.ll
@@ -10,12 +10,11 @@ target triple = "wasm32-unknown-unknown"
 
 define void @store(half %x, ptr %p) nounwind {
 ; ALL-LABEL: store:
-; ALL:         .functype store (f32, i32) -> ()
+; ALL:         .functype store (i32, i32) -> ()
 ; ALL-NEXT:  # %bb.0:
-; ALL-NEXT:    local.get $push2=, 1
-; ALL-NEXT:    local.get $push1=, 0
-; ALL-NEXT:    call $push0=, __truncsfhf2, $pop1
-; ALL-NEXT:    i32.store16 0($pop2), $pop0
+; ALL-NEXT:    local.get $push1=, 1
+; ALL-NEXT:    local.get $push0=, 0
+; ALL-NEXT:    i32.store16 0($pop1), $pop0
 ; ALL-NEXT:    return
   store half %x, ptr %p
   ret void
@@ -23,12 +22,11 @@ define void @store(half %x, ptr %p) nounwind {
 
 define half @return(ptr %p) nounwind {
 ; ALL-LABEL: return:
-; ALL:         .functype return (i32) -> (f32)
+; ALL:         .functype return (i32) -> (i32)
 ; ALL-NEXT:  # %bb.0:
-; ALL-NEXT:    local.get $push2=, 0
-; ALL-NEXT:    i32.load16_u $push0=, 0($pop2)
-; ALL-NEXT:    call $push1=, __extendhfsf2, $pop0
-; ALL-NEXT:    return $pop1
+; ALL-NEXT:    local.get $push1=, 0
+; ALL-NEXT:    i32.load16_u $push0=, 0($pop1)
+; ALL-NEXT:    return $pop0
   %r = load half, ptr %p
   ret half %r
 }
@@ -80,50 +78,28 @@ define dso_local float @loadf(ptr nocapture readonly %a) local_unnamed_addr noun
 }
 
 define dso_local void @stored(ptr nocapture %a, double %b) local_unnamed_addr nounwind {
-; DEFISEL-LABEL: stored:
-; DEFISEL:         .functype stored (i32, f64) -> ()
-; DEFISEL-NEXT:  # %bb.0:
-; DEFISEL-NEXT:    local.get $push2=, 0
-; DEFISEL-NEXT:    local.get $push1=, 1
-; DEFISEL-NEXT:    call $push0=, __truncdfhf2, $pop1
-; DEFISEL-NEXT:    i32.store16 0($pop2), $pop0
-; DEFISEL-NEXT:    return
-;
-; FASTISEL-LABEL: stored:
-; FASTISEL:         .functype stored (i32, f64) -> ()
-; FASTISEL-NEXT:  # %bb.0:
-; FASTISEL-NEXT:    local.get $push4=, 0
-; FASTISEL-NEXT:    local.get $push3=, 1
-; FASTISEL-NEXT:    call $push2=, __truncdfhf2, $pop3
-; FASTISEL-NEXT:    i32.const $push1=, 65535
-; FASTISEL-NEXT:    i32.and $push0=, $pop2, $pop1
-; FASTISEL-NEXT:    i32.store16 0($pop4), $pop0
-; FASTISEL-NEXT:    return
+; ALL-LABEL: stored:
+; ALL:         .functype stored (i32, f64) -> ()
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push2=, 0
+; ALL-NEXT:    local.get $push1=, 1
+; ALL-NEXT:    call $push0=, __truncdfhf2, $pop1
+; ALL-NEXT:    i32.store16 0($pop2), $pop0
+; ALL-NEXT:    return
   %x = tail call i16 @llvm.convert.to.fp16.f64(double %b)
   store i16 %x, ptr %a, align 2
   ret void
 }
 
 define dso_local void @storef(ptr nocapture %a, float %b) local_unnamed_addr nounwind {
-; DEFISEL-LABEL: storef:
-; DEFISEL:         .functype storef (i32, f32) -> ()
-; DEFISEL-NEXT:  # %bb.0:
-; DEFISEL-NEXT:    local.get $push2=, 0
-; DEFISEL-NEXT:    local.get $push1=, 1
-; DEFISEL-NEXT:    call $push0=, __truncsfhf2, $pop1
-; DEFISEL-NEXT:    i32.store16 0($pop2), $pop0
-; DEFISEL-NEXT:    return
-;
-; FASTISEL-LABEL: storef:
-; FASTISEL:         .functype storef (i32, f32) -> ()
-; FASTISEL-NEXT:  # %bb.0:
-; FASTISEL-NEXT:    local.get $push4=, 0
-; FASTISEL-NEXT:    local.get $push3=, 1
-; FASTISEL-NEXT:    call $push2=, __truncsfhf2, $pop3
-; FASTISEL-NEXT:    i32.const $push1=, 65535
-; FASTISEL-NEXT:    i32.and $push0=, $pop2, $pop1
-; FASTISEL-NEXT:    i32.store16 0($pop4), $pop0
-; FASTISEL-NEXT:    return
+; ALL-LABEL: storef:
+; ALL:         .functype storef (i32, f32) -> ()
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push2=, 0
+; ALL-NEXT:    local.get $push1=, 1
+; ALL-NEXT:    call $push0=, __truncsfhf2, $pop1
+; ALL-NEXT:    i32.store16 0($pop2), $pop0
+; ALL-NEXT:    return
   %x = tail call i16 @llvm.convert.to.fp16.f32(float %b)
   store i16 %x, ptr %a, align 2
   ret void
@@ -170,33 +146,20 @@ define void @test_bitcast_to_half(ptr %addr, i16 %in) nounwind {
 
 define half @from_bits(i16 %x) nounwind {
 ; ALL-LABEL: from_bits:
-; ALL:         .functype from_bits (i32) -> (f32)
+; ALL:         .functype from_bits (i32) -> (i32)
 ; ALL-NEXT:  # %bb.0:
-; ALL-NEXT:    local.get $push1=, 0
-; ALL-NEXT:    call $push0=, __extendhfsf2, $pop1
+; ALL-NEXT:    local.get $push0=, 0
 ; ALL-NEXT:    return $pop0
   %res = bitcast i16 %x to half
   ret half %res
 }
 
 define i16 @to_bits(half %x) nounwind {
-; DEFISEL-LABEL: to_bits:
-; DEFISEL:         .functype to_bits (f32) -> (i32)
-; DEFISEL-NEXT:  # %bb.0:
-; DEFISEL-NEXT:    local.get $push3=, 0
-; DEFISEL-NEXT:    call $push1=, __truncsfhf2, $pop3
-; DEFISEL-NEXT:    i32.const $push0=, 65535
-; DEFISEL-NEXT:    i32.and $push2=, $pop1, $pop0
-; DEFISEL-NEXT:    return $pop2
-;
-; FASTISEL-LABEL: to_bits:
-; FASTISEL:         .functype to_bits (f32) -> (i32)
-; FASTISEL-NEXT:  # %bb.0:
-; FASTISEL-NEXT:    local.get $push3=, 0
-; FASTISEL-NEXT:    call $push2=, __truncsfhf2, $pop3
-; FASTISEL-NEXT:    i32.const $push1=, 65535
-; FASTISEL-NEXT:    i32.and $push0=, $pop2, $pop1
-; FASTISEL-NEXT:    return $pop0
+; ALL-LABEL: to_bits:
+; ALL:         .functype to_bits (i32) -> (i32)
+; ALL-NEXT:  # %bb.0:
+; ALL-NEXT:    local.get $push0=, 0
+; ALL-NEXT:    return $pop0
     %res = bitcast half %x to i16
     ret i16 %res
 }
@@ -559,27 +522,35 @@ define float @test_sitofp_fadd_i32(i32 %a, ptr %b) nounwind {
 ; DEFISEL-LABEL: test_sitofp_fadd_i32:
 ; DEFISEL:         .functype test_sitofp_fadd_i32 (i32, i32) -> (f32)
 ; DEFISEL-NEXT:  # %bb.0:
-; DEFISEL-NEXT:    local.get $push6=, 1
-; DEFISEL-NEXT:    i32.load16_u $push1=, 0($pop6)
+; DEFISEL-NEXT:    local.get $push8=, 1
+; DEFISEL-NEXT:    i32.load16_u $push7=, 0($pop8)
+; DEFISEL-NEXT:    local.set 1, $pop7
+; DEFISEL-NEXT:    local.get $push9=, 0
+; DEFISEL-NEXT:    f32.convert_i32_s $push0=, $pop9
+; DEFISEL-NEXT:    call $push1=, __truncsfhf2, $pop0
 ; DEFISEL-NEXT:    call $push2=, __extendhfsf2, $pop1
-; DEFISEL-NEXT:    local.get $push7=, 0
-; DEFISEL-NEXT:    f32.convert_i32_s $push0=, $pop7
-; DEFISEL-NEXT:    call $push3=, __truncsfhf2, $pop0
-; DEFISEL-NEXT:    call $push4=, __extendhfsf2, $pop3
-; DEFISEL-NEXT:    f32.add $push5=, $pop2, $pop4
-; DEFISEL-NEXT:    return $pop5
+; DEFISEL-NEXT:    local.get $push10=, 1
+; DEFISEL-NEXT:    call $push3=, __extendhfsf2, $pop10
+; DEFISEL-NEXT:    f32.add $push4=, $pop2, $pop3
+; DEFISEL-NEXT:    call $push5=, __truncsfhf2, $pop4
+; DEFISEL-NEXT:    call $push6=, __extendhfsf2, $pop5
+; DEFISEL-NEXT:    return $pop6
 ;
 ; FASTISEL-LABEL: test_sitofp_fadd_i32:
 ; FASTISEL:         .functype test_sitofp_fadd_i32 (i32, i32) -> (f32)
 ; FASTISEL-NEXT:  # %bb.0:
-; FASTISEL-NEXT:    local.get $push6=, 1
-; FASTISEL-NEXT:    i32.load16_u $push2=, 0($pop6)
+; FASTISEL-NEXT:    local.get $push8=, 1
+; FASTISEL-NEXT:    i32.load16_u $push7=, 0($pop8)
+; FASTISEL-NEXT:    local.set 1, $pop7
+; FASTISEL-NEXT:    local.get $push9=, 0
+; FASTISEL-NEXT:    f32.convert_i32_s $push1=, $pop9
+; FASTISEL-NEXT:    call $push2=, __truncsfhf2, $pop1
 ; FASTISEL-NEXT:    call $push3=, __extendhfsf2, $pop2
-; FASTISEL-NEXT:    local.get $push7=, 0
-; FASTISEL-NEXT:    f32.convert_i32_s $push1=, $pop7
-; FASTISEL-NEXT:    call $push4=, __truncsfhf2, $pop1
-; FASTISEL-NEXT:    call $push5=, __extendhfsf2, $pop4
-; FASTISEL-NEXT:    f32.add $push0=, $pop3, $pop5
+; FASTISEL-NEXT:    local.get $push10=, 1
+; FASTISEL-NEXT:    call $push4=, __extendhfsf2, $pop10
+; FASTISEL-NEXT:    f32.add $push5=, $pop3, $pop4
+; FASTISEL-NEXT:    call $push6=, __truncsfhf2, $pop5
+; FASTISEL-NEXT:    call $push0=, __extendhfsf2, $pop6
 ; FASTISEL-NEXT:    return $pop0
   %tmp0 = load half, ptr %b
   %tmp1 = sitofp i32 %a to half
@@ -590,17 +561,20 @@ define float @test_sitofp_fadd_i32(i32 %a, ptr %b) nounwind {
 
 define half @chained_fp_ops(half %x) {
 ; ALL-LABEL: chained_fp_ops:
-; ALL:         .functype chained_fp_ops (f32) -> (f32)
+; ALL:         .functype chained_fp_ops (i32) -> (i32)
+; ALL-NEXT:    .local f32
 ; ALL-NEXT:  # %bb.0: # %start
-; ALL-NEXT:    local.get $push6=, 0
-; ALL-NEXT:    call $push0=, __truncsfhf2, $pop6
-; ALL-NEXT:    call $push5=, __extendhfsf2, $pop0
-; ALL-NEXT:    local.tee $push4=, 0, $pop5
-; ALL-NEXT:    local.get $push7=, 0
-; ALL-NEXT:    f32.add $push1=, $pop4, $pop7
-; ALL-NEXT:    f32.const $push2=, 0x1p-1
-; ALL-NEXT:    f32.mul $push3=, $pop1, $pop2
-; ALL-NEXT:    return $pop3
+; ALL-NEXT:    local.get $push8=, 0
+; ALL-NEXT:    call $push7=, __extendhfsf2, $pop8
+; ALL-NEXT:    local.tee $push6=, 1, $pop7
+; ALL-NEXT:    local.get $push9=, 1
+; ALL-NEXT:    f32.add $push0=, $pop6, $pop9
+; ALL-NEXT:    call $push2=, __truncsfhf2, $pop0
+; ALL-NEXT:    call $push3=, __extendhfsf2, $pop2
+; ALL-NEXT:    f32.const $push1=, 0x1p-1
+; ALL-NEXT:    f32.mul $push4=, $pop3, $pop1
+; ALL-NEXT:    call $push5=, __truncsfhf2, $pop4
+; ALL-NEXT:    return $pop5
 start:
   %y = fmul half %x, 0xH4000
   %z = fdiv half %y, 0xH4000
@@ -609,16 +583,15 @@ start:
 
 define half @test_select_cc(half) nounwind {
 ; ALL-LABEL: test_select_cc:
-; ALL:         .functype test_select_cc (f32) -> (f32)
+; ALL:         .functype test_select_cc (i32) -> (i32)
 ; ALL-NEXT:  # %bb.0:
-; ALL-NEXT:    f32.const $push4=, 0x1p0
+; ALL-NEXT:    i32.const $push4=, 15360
+; ALL-NEXT:    i32.const $push3=, 0
+; ALL-NEXT:    local.get $push6=, 0
+; ALL-NEXT:    call $push1=, __extendhfsf2, $pop6
 ; ALL-NEXT:    f32.const $push0=, 0x0p0
-; ALL-NEXT:    local.get $push7=, 0
-; ALL-NEXT:    call $push1=, __truncsfhf2, $pop7
-; ALL-NEXT:    call $push2=, __extendhfsf2, $pop1
-; ALL-NEXT:    f32.const $push6=, 0x0p0
-; ALL-NEXT:    f32.ne $push3=, $pop2, $pop6
-; ALL-NEXT:    f32.select $push5=, $pop4, $pop0, $pop3
+; ALL-NEXT:    f32.ne $push2=, $pop1, $pop0
+; ALL-NEXT:    i32.select $push5=, $pop4, $pop3, $pop2
 ; ALL-NEXT:    return $pop5
   %2 = fcmp une half %0, 0xH0000
   %3 = uitofp i1 %2 to half
@@ -627,27 +600,28 @@ define half @test_select_cc(half) nounwind {
 
 define half @fabs(half %x) nounwind {
 ; ALL-LABEL: fabs:
-; ALL:         .functype fabs (f32) -> (f32)
+; ALL:         .functype fabs (i32) -> (i32)
 ; ALL-NEXT:  # %bb.0:
-; ALL-NEXT:    local.get $push3=, 0
-; ALL-NEXT:    call $push0=, __truncsfhf2, $pop3
-; ALL-NEXT:    call $push1=, __extendhfsf2, $pop0
-; ALL-NEXT:    f32.abs $push2=, $pop1
-; ALL-NEXT:    return $pop2
+; ALL-NEXT:    local.get $push2=, 0
+; ALL-NEXT:    i32.const $push0=, 32767
+; ALL-NEXT:    i32.and $push1=, $pop2, $pop0
+; ALL-NEXT:    return $pop1
   %a = call half @llvm.fabs.f16(half %x)
   ret half %a
 }
 
 define half @fcopysign(half %x, half %y) nounwind {
 ; ALL-LABEL: fcopysign:
-; ALL:         .functype fcopysign (f32, f32) -> (f32)
+; ALL:         .functype fcopysign (i32, i32) -> (i32)
 ; ALL-NEXT:  # %bb.0:
-; ALL-NEXT:    local.get $push3=, 0
-; ALL-NEXT:    call $push0=, __truncsfhf2, $pop3
-; ALL-NEXT:    call $push1=, __extendhfsf2, $pop0
-; ALL-NEXT:    local.get $push4=, 1
-; ALL-NEXT:    f32.copysign $push2=, $pop1, $pop4
-; ALL-NEXT:    return $pop2
+; ALL-NEXT:    local.get $push5=, 0
+; ALL-NEXT:    i32.const $push2=, 32767
+; ALL-NEXT:    i32.and $push3=, $pop5, $pop2
+; ALL-NEXT:    local.get $push6=, 1
+; ALL-NEXT:    i32.const $push0=, -32768
+; ALL-NEXT:    i32.and $push1=, $pop6, $pop0
+; ALL-NEXT:    i32.or $push4=, $pop3, $pop1
+; ALL-NEXT:    return $pop4
   %a = call half @llvm.copysign.f16(half %x, half %y)
   ret half %a
 }
diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
index 137994ceac132..0fbaf815b9eba 100644
--- a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
@@ -103,10 +103,9 @@ entry:
 
 define i32 @stest_f16i32(half %x) {
 ; CHECK-LABEL: stest_f16i32:
-; CHECK:         .functype stest_f16i32 (f32) -> (i32)
+; CHECK:         .functype stest_f16i32 (i32) -> (i32)
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    # fallthrough-return
@@ -122,10 +121,9 @@ entry:
 
 define i32 @stest_f16i32_cse(half %x) {
 ; CHECK-LABEL: stest_f16i32_cse:
-; CHECK:         .functype stest_f16i32_cse (f32) -> (i32)
+; CHECK:         .functype stest_f16i32_cse (i32) -> (i32)
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i64.trunc_sat_f32_s
 ; CHECK-NEXT:    i32.wrap_i64
@@ -138,10 +136,9 @@ entry:
 
 define i32 @utesth_f16i32(half %x) {
 ; CHECK-LABEL: utesth_f16i32:
-; CHECK:         .functype utesth_f16i32 (f32) -> (i32)
+; CHECK:         .functype utesth_f16i32 (i32) -> (i32)
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    # fallthrough-return
@@ -155,10 +152,9 @@ entry:
 
 define i32 @utesth_f16i32_cse(half %x) {
 ; CHECK-LABEL: utesth_f16i32_cse:
-; CHECK:         .functype utesth_f16i32_cse (f32) -> (i32)
+; CHECK:         .functype utesth_f16i32_cse (i32) -> (i32)
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i64.trunc_sat_f32_u
 ; CHECK-NEXT:    i32.wrap_i64
@@ -171,10 +167,9 @@ entry:
 
 define i32 @ustest_f16i32(half %x) {
 ; CHECK-LABEL: ustest_f16i32:
-; CHECK:         .functype ustest_f16i32 (f32) -> (i32)
+; CHECK:         .functype ustest_f16i32 (i32) -> (i32)
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    # fallthrough-return
@@ -190,10 +185,9 @@ entry:
 
 define i32 @ustest_f16i32_cse(half %x) {
 ; CHECK-LABEL: ustest_f16i32_cse:
-; CHECK:         .functype ustest_f16i32_cse (f32) -> (i32)
+; CHECK:         .functype ustest_f16i32_cse (i32) -> (i32)
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    # fallthrough-return
@@ -373,22 +367,20 @@ entry:
 
 define i16 @stest_f16i16(half %x) {
 ; CHECK-LABEL: stest_f16i16:
-; CHECK:         .functype stest_f16i16 (f32) -> (i32)
-; CHECK-NEXT:    .local i32
+; CHECK:         .functype stest_f16i16 (i32) -> (i32)
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
-; CHECK-NEXT:    local.tee 1
+; CHECK-NEXT:    local.tee 0
 ; CHECK-NEXT:    i32.const 32767
-; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const 32767
 ; CHECK-NEXT:    i32.lt_s
 ; CHECK-NEXT:    i32.select
-; CHECK-NEXT:    local.tee 1
+; CHECK-NEXT:    local.tee 0
 ; CHECK-NEXT:    i32.const -32768
-; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const -32768
 ; CHECK-NEXT:    i32.gt_s
 ; CHECK-NEXT:    i32.select
@@ -405,16 +397,14 @@ entry:
 
 define i16 @utesth_f16i16(half %x) {
 ; CHECK-LABEL: utesth_f16i16:
-; CHECK:         .functype utesth_f16i16 (f32) -> (i32)
-; CHECK-NEXT:    .local i32
+; CHECK:         .functype utesth_f16i16 (i32) -> (i32)
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
-; CHECK-NEXT:    local.tee 1
+; CHECK-NEXT:    local.tee 0
 ; CHECK-NEXT:    i32.const 65535
-; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const 65535
 ; CHECK-NEXT:    i32.lt_u
 ; CHECK-NEXT:    i32.select
@@ -429,10 +419,9 @@ entry:
 
 define i16 @utesth_f16i16_cse(half %x) {
 ; CHECK-LABEL: utesth_f16i16_cse:
-; CHECK:         .functype utesth_f16i16_cse (f32) -> (i32)
+; CHECK:         .functype utesth_f16i16_cse (i32) -> (i32)
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    # fallthrough-return
@@ -444,22 +433,20 @@ entry:
 
 define i16 @ustest_f16i16(half %x) {
 ; CHECK-LABEL: ustest_f16i16:
-; CHECK:         .functype ustest_f16i16 (f32) -> (i32)
-; CHECK-NEXT:    .local i32
+; CHECK:         .functype ustest_f16i16 (i32) -> (i32)
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
-; CHECK-NEXT:    local.tee 1
+; CHECK-NEXT:    local.tee 0
 ; CHECK-NEXT:    i32.const 65535
-; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const 65535
 ; CHECK-NEXT:    i32.lt_s
 ; CHECK-NEXT:    i32.select
-; CHECK-NEXT:    local.tee 1
+; CHECK-NEXT:    local.tee 0
 ; CHECK-NEXT:    i32.const 0
-; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const 0
 ; CHECK-NEXT:    i32.gt_s
 ; CHECK-NEXT:    i32.select
@@ -476,10 +463,9 @@ entry:
 
 define i16 @ustest_f16i16_cse(half %x) {
 ; CHECK-LABEL: ustest_f16i16_cse:
-; CHECK:         .functype ustest_f16i16_cse (f32) -> (i32)
+; CHECK:         .functype ustest_f16i16_cse (i32) -> (i32)
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    # fallthrough-return
@@ -863,10 +849,9 @@ entry:
 
 define i64 @stest_f16i64(half %x) {
 ; CHECK-LABEL: stest_f16i64:
-; CHECK:         .functype stest_f16i64 (f32) -> (i64)
+; CHECK:         .functype stest_f16i64 (i32) -> (i64)
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i64.trunc_sat_f32_s
 ; CHECK-NEXT:    # fallthrough-return
@@ -882,7 +867,7 @@ entry:
 
 define i64 @utesth_f16i64(half %x) {
 ; CHECK-LABEL: utesth_f16i64:
-; CHECK:         .functype utesth_f16i64 (f32) -> (i64)
+; CHECK:         .functype utesth_f16i64 (i32) -> (i64)
 ; CHECK-NEXT:    .local i32, i64, i64
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    global.get __stack_pointer
@@ -892,7 +877,6 @@ define i64 @utesth_f16i64(half %x) {
 ; CHECK-NEXT:    global.set __stack_pointer
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    call __fixunssfti
 ; CHECK-NEXT:    local.get 1
@@ -921,7 +905,7 @@ entry:
 
 define i64 @utesth_f16i64_cse(half %x) {
 ; CHECK-LABEL: utesth_f16i64_cse:
-; CHECK:         .functype utesth_f16i64_cse (f32) -> (i64)
+; CHECK:         .functype utesth_f16i64_cse (i32) -> (i64)
 ; CHECK-NEXT:    .local i32, i64
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    global.get __stack_pointer
@@ -931,7 +915,6 @@ define i64 @utesth_f16i64_cse(half %x) {
 ; CHECK-NEXT:    global.set __stack_pointer
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    call __fixunssfti
 ; CHECK-NEXT:    local.get 1
@@ -951,7 +934,7 @@ entry:
 
 define i64 @ustest_f16i64(half %x) {
 ; CHECK-LABEL: ustest_f16i64:
-; CHECK:         .functype ustest_f16i64 (f32) -> (i64)
+; CHECK:         .functype ustest_f16i64 (i32) -> (i64)
 ; CHECK-NEXT:    .local i32, i64, i64
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    global.get __stack_pointer
@@ -961,7 +944,6 @@ define i64 @ustest_f16i64(half %x) {
 ; CHECK-NEXT:    global.set __stack_pointer
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    call __fixsfti
 ; CHECK-NEXT:    local.get 1
@@ -1103,10 +1085,9 @@ entry:
 
 define i32 @stest_f16i32_mm(half %x) {
 ; CHECK-LABEL: stest_f16i32_mm:
-; CHECK:         .functype stest_f16i32_mm (f32) -> (i32)
+; CHECK:         .functype stest_f16i32_mm (i32) -> (i32)
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    # fallthrough-return
@@ -1120,10 +1101,9 @@ entry:
 
 define i32 @utesth_f16i32_mm(half %x) {
 ; CHECK-LABEL: utesth_f16i32_mm:
-; CHECK:         .functype utesth_f16i32_mm (f32) -> (i32)
+; CHECK:         .functype utesth_f16i32_mm (i32) -> (i32)
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    # fallthrough-return
@@ -1136,10 +1116,9 @@ entry:
 
 define i32 @ustest_f16i32_mm(half %x) {
 ; CHECK-LABEL: ustest_f16i32_mm:
-; CHECK:         .functype ustest_f16i32_mm (f32) -> (i32)
+; CHECK:         .functype ustest_f16i32_mm (i32) -> (i32)
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    # fallthrough-return
@@ -1153,10 +1132,9 @@ entry:
 
 define i32 @ustest_f16i32_mm_cse(half %x) {
 ; CHECK-LABEL: ustest_f16i32_mm_cse:
-; CHECK:         .functype ustest_f16i32_mm_cse (f32) -> (i32)
+; CHECK:         .functype ustest_f16i32_mm_cse (i32) -> (i32)
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    # fallthrough-return
@@ -1325,22 +1303,20 @@ entry:
 
 define i16 @stest_f16i16_mm(half %x) {
 ; CHECK-LABEL: stest_f16i16_mm:
-; CHECK:         .functype stest_f16i16_mm (f32) -> (i32)
-; CHECK-NEXT:    .local i32
+; CHECK:         .functype stest_f16i16_mm (i32) -> (i32)
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
-; CHECK-NEXT:    local.tee 1
+; CHECK-NEXT:    local.tee 0
 ; CHECK-NEXT:    i32.const 32767
-; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const 32767
 ; CHECK-NEXT:    i32.lt_s
 ; CHECK-NEXT:    i32.select
-; CHECK-NEXT:    local.tee 1
+; CHECK-NEXT:    local.tee 0
 ; CHECK-NEXT:    i32.const -32768
-; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const -32768
 ; CHECK-NEXT:    i32.gt_s
 ; CHECK-NEXT:    i32.select
@@ -1355,16 +1331,14 @@ entry:
 
 define i16 @utesth_f16i16_mm(half %x) {
 ; CHECK-LABEL: utesth_f16i16_mm:
-; CHECK:         .functype utesth_f16i16_mm (f32) -> (i32)
-; CHECK-NEXT:    .local i32
+; CHECK:         .functype utesth_f16i16_mm (i32) -> (i32)
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
-; CHECK-NEXT:    local.tee 1
+; CHECK-NEXT:    local.tee 0
 ; CHECK-NEXT:    i32.const 65535
-; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const 65535
 ; CHECK-NEXT:    i32.lt_u
 ; CHECK-NEXT:    i32.select
@@ -1378,22 +1352,20 @@ entry:
 
 define i16 @ustest_f16i16_mm(half %x) {
 ; CHECK-LABEL: ustest_f16i16_mm:
-; CHECK:         .functype ustest_f16i16_mm (f32) -> (i32)
-; CHECK-NEXT:    .local i32
+; CHECK:         .functype ustest_f16i16_mm (i32) -> (i32)
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
-; CHECK-NEXT:    local.tee 1
+; CHECK-NEXT:    local.tee 0
 ; CHECK-NEXT:    i32.const 65535
-; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const 65535
 ; CHECK-NEXT:    i32.lt_s
 ; CHECK-NEXT:    i32.select
-; CHECK-NEXT:    local.tee 1
+; CHECK-NEXT:    local.tee 0
 ; CHECK-NEXT:    i32.const 0
-; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const 0
 ; CHECK-NEXT:    i32.gt_s
 ; CHECK-NEXT:    i32.select
@@ -1408,10 +1380,9 @@ entry:
 
 define i16 @ustest_f16i16_mm_cse(half %x) {
 ; CHECK-LABEL: ustest_f16i16_mm_cse:
-; CHECK:         .functype ustest_f16i16_mm_cse (f32) -> (i32)
+; CHECK:         .functype ustest_f16i16_mm_cse (i32) -> (i32)
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    # fallthrough-return
@@ -1622,10 +1593,9 @@ entry:
 
 define i64 @stest_f16i64_mm(half %x) {
 ; CHECK-LABEL: stest_f16i64_mm:
-; CHECK:         .functype stest_f16i64_mm (f32) -> (i64)
+; CHECK:         .functype stest_f16i64_mm (i32) -> (i64)
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i64.trunc_sat_f32_s
 ; CHECK-NEXT:    # fallthrough-return
@@ -1639,7 +1609,7 @@ entry:
 
 define i64 @utesth_f16i64_mm(half %x) {
 ; CHECK-LABEL: utesth_f16i64_mm:
-; CHECK:         .functype utesth_f16i64_mm (f32) -> (i64)
+; CHECK:         .functype utesth_f16i64_mm (i32) -> (i64)
 ; CHECK-NEXT:    .local i32, i64, i64
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    global.get __stack_pointer
@@ -1649,7 +1619,6 @@ define i64 @utesth_f16i64_mm(half %x) {
 ; CHECK-NEXT:    global.set __stack_pointer
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    call __fixunssfti
 ; CHECK-NEXT:    local.get 1
@@ -1677,7 +1646,7 @@ entry:
 
 define i64 @ustest_f16i64_mm(half %x) {
 ; CHECK-LABEL: ustest_f16i64_mm:
-; CHECK:         .functype ustest_f16i64_mm (f32) -> (i64)
+; CHECK:         .functype ustest_f16i64_mm (i32) -> (i64)
 ; CHECK-NEXT:    .local i32, i64, i64
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    global.get __stack_pointer
@@ -1687,7 +1656,6 @@ define i64 @ustest_f16i64_mm(half %x) {
 ; CHECK-NEXT:    global.set __stack_pointer
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    call __fixsfti
 ; CHECK-NEXT:    local.get 1
@@ -1726,7 +1694,7 @@ entry:
 
 define i64 @utesth_f16i64_mm_cse(half %x) {
 ; CHECK-LABEL: utesth_f16i64_mm_cse:
-; CHECK:         .functype utesth_f16i64_mm_cse (f32) -> (i64)
+; CHECK:         .functype utesth_f16i64_mm_cse (i32) -> (i64)
 ; CHECK-NEXT:    .local i32, i64
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    global.get __stack_pointer
@@ -1736,7 +1704,6 @@ define i64 @utesth_f16i64_mm_cse(half %x) {
 ; CHECK-NEXT:    global.set __stack_pointer
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    call __fixunssfti
 ; CHECK-NEXT:    local.get 1
diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
index 7190e162eb010..fabc5c174fd0b 100644
--- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
@@ -174,27 +174,24 @@ entry:
 
 define <4 x i32> @stest_f16i32(<4 x half> %x) {
 ; CHECK-LABEL: stest_f16i32:
-; CHECK:         .functype stest_f16i32 (f32, f32, f32, f32) -> (v128)
+; CHECK:         .functype stest_f16i32 (i32, i32, i32, i32) -> (v128)
+; CHECK-NEXT:    .local f32
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 1
+; CHECK-NEXT:    local.set 4
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.splat
-; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 4
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 2
 ; CHECK-NEXT:    local.get 3
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 3
@@ -211,27 +208,24 @@ entry:
 
 define <4 x i32> @utesth_f16i32(<4 x half> %x) {
 ; CHECK-LABEL: utesth_f16i32:
-; CHECK:         .functype utesth_f16i32 (f32, f32, f32, f32) -> (v128)
+; CHECK:         .functype utesth_f16i32 (i32, i32, i32, i32) -> (v128)
+; CHECK-NEXT:    .local f32
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 1
+; CHECK-NEXT:    local.set 4
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.splat
-; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 4
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.replace_lane 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.replace_lane 2
 ; CHECK-NEXT:    local.get 3
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.replace_lane 3
@@ -246,27 +240,24 @@ entry:
 
 define <4 x i32> @ustest_f16i32(<4 x half> %x) {
 ; CHECK-LABEL: ustest_f16i32:
-; CHECK:         .functype ustest_f16i32 (f32, f32, f32, f32) -> (v128)
+; CHECK:         .functype ustest_f16i32 (i32, i32, i32, i32) -> (v128)
+; CHECK-NEXT:    .local f32
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 1
+; CHECK-NEXT:    local.set 4
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.splat
-; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 4
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.replace_lane 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.replace_lane 2
 ; CHECK-NEXT:    local.get 3
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.replace_lane 3
@@ -433,73 +424,65 @@ entry:
 
 define <8 x i16> @stest_f16i16(<8 x half> %x) {
 ; CHECK-LABEL: stest_f16i16:
-; CHECK:         .functype stest_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
-; CHECK-NEXT:    .local v128, v128, v128
+; CHECK:         .functype stest_f16i16 (i32, i32, i32, i32, i32, i32, i32, i32) -> (v128)
+; CHECK-NEXT:    .local f32, f32, f32, f32, f32, v128, v128, v128
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 5
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 5
+; CHECK-NEXT:    local.set 8
 ; CHECK-NEXT:    local.get 4
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 4
+; CHECK-NEXT:    local.set 9
 ; CHECK-NEXT:    local.get 6
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 6
+; CHECK-NEXT:    local.set 10
 ; CHECK-NEXT:    local.get 7
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 7
+; CHECK-NEXT:    local.set 11
 ; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 1
+; CHECK-NEXT:    local.set 12
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.splat
-; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 12
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 2
 ; CHECK-NEXT:    local.get 3
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 3
 ; CHECK-NEXT:    v128.const 32767, 32767, 32767, 32767
-; CHECK-NEXT:    local.tee 8
+; CHECK-NEXT:    local.tee 13
 ; CHECK-NEXT:    i32x4.min_s
 ; CHECK-NEXT:    v128.const -32768, -32768, -32768, -32768
-; CHECK-NEXT:    local.tee 9
+; CHECK-NEXT:    local.tee 14
 ; CHECK-NEXT:    i32x4.max_s
 ; CHECK-NEXT:    v128.const 65535, 65535, 65535, 65535
-; CHECK-NEXT:    local.tee 10
+; CHECK-NEXT:    local.tee 15
 ; CHECK-NEXT:    v128.and
-; CHECK-NEXT:    local.get 4
+; CHECK-NEXT:    local.get 9
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.splat
-; CHECK-NEXT:    local.get 5
+; CHECK-NEXT:    local.get 8
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 1
-; CHECK-NEXT:    local.get 6
+; CHECK-NEXT:    local.get 10
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 2
-; CHECK-NEXT:    local.get 7
+; CHECK-NEXT:    local.get 11
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 3
-; CHECK-NEXT:    local.get 8
+; CHECK-NEXT:    local.get 13
 ; CHECK-NEXT:    i32x4.min_s
-; CHECK-NEXT:    local.get 9
+; CHECK-NEXT:    local.get 14
 ; CHECK-NEXT:    i32x4.max_s
-; CHECK-NEXT:    local.get 10
+; CHECK-NEXT:    local.get 15
 ; CHECK-NEXT:    v128.and
 ; CHECK-NEXT:    i16x8.narrow_i32x4_u
 ; CHECK-NEXT:    # fallthrough-return
@@ -515,63 +498,55 @@ entry:
 
 define <8 x i16> @utesth_f16i16(<8 x half> %x) {
 ; CHECK-LABEL: utesth_f16i16:
-; CHECK:         .functype utesth_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
-; CHECK-NEXT:    .local v128
+; CHECK:         .functype utesth_f16i16 (i32, i32, i32, i32, i32, i32, i32, i32) -> (v128)
+; CHECK-NEXT:    .local f32, f32, f32, f32, f32, v128
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 5
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 5
+; CHECK-NEXT:    local.set 8
 ; CHECK-NEXT:    local.get 4
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 4
+; CHECK-NEXT:    local.set 9
 ; CHECK-NEXT:    local.get 6
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 6
+; CHECK-NEXT:    local.set 10
 ; CHECK-NEXT:    local.get 7
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 7
+; CHECK-NEXT:    local.set 11
 ; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 1
+; CHECK-NEXT:    local.set 12
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.splat
-; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 12
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.replace_lane 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.replace_lane 2
 ; CHECK-NEXT:    local.get 3
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.replace_lane 3
 ; CHECK-NEXT:    v128.const 65535, 65535, 65535, 65535
-; CHECK-NEXT:    local.tee 8
+; CHECK-NEXT:    local.tee 13
 ; CHECK-NEXT:    i32x4.min_u
-; CHECK-NEXT:    local.get 4
+; CHECK-NEXT:    local.get 9
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.splat
-; CHECK-NEXT:    local.get 5
+; CHECK-NEXT:    local.get 8
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.replace_lane 1
-; CHECK-NEXT:    local.get 6
+; CHECK-NEXT:    local.get 10
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.replace_lane 2
-; CHECK-NEXT:    local.get 7
+; CHECK-NEXT:    local.get 11
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.replace_lane 3
-; CHECK-NEXT:    local.get 8
+; CHECK-NEXT:    local.get 13
 ; CHECK-NEXT:    i32x4.min_u
 ; CHECK-NEXT:    i16x8.narrow_i32x4_u
 ; CHECK-NEXT:    # fallthrough-return
@@ -585,68 +560,60 @@ entry:
 
 define <8 x i16> @ustest_f16i16(<8 x half> %x) {
 ; CHECK-LABEL: ustest_f16i16:
-; CHECK:         .functype ustest_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
-; CHECK-NEXT:    .local v128, v128
+; CHECK:         .functype ustest_f16i16 (i32, i32, i32, i32, i32, i32, i32, i32) -> (v128)
+; CHECK-NEXT:    .local f32, f32, f32, f32, f32, v128, v128
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 5
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 5
+; CHECK-NEXT:    local.set 8
 ; CHECK-NEXT:    local.get 4
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 4
+; CHECK-NEXT:    local.set 9
 ; CHECK-NEXT:    local.get 6
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 6
+; CHECK-NEXT:    local.set 10
 ; CHECK-NEXT:    local.get 7
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 7
+; CHECK-NEXT:    local.set 11
 ; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 1
+; CHECK-NEXT:    local.set 12
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.splat
-; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 12
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 2
 ; CHECK-NEXT:    local.get 3
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 3
 ; CHECK-NEXT:    v128.const 65535, 65535, 65535, 65535
-; CHECK-NEXT:    local.tee 8
+; CHECK-NEXT:    local.tee 13
 ; CHECK-NEXT:    i32x4.min_s
 ; CHECK-NEXT:    v128.const 0, 0, 0, 0
-; CHECK-NEXT:    local.tee 9
+; CHECK-NEXT:    local.tee 14
 ; CHECK-NEXT:    i32x4.max_s
-; CHECK-NEXT:    local.get 4
+; CHECK-NEXT:    local.get 9
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.splat
-; CHECK-NEXT:    local.get 5
+; CHECK-NEXT:    local.get 8
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 1
-; CHECK-NEXT:    local.get 6
+; CHECK-NEXT:    local.get 10
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 2
-; CHECK-NEXT:    local.get 7
+; CHECK-NEXT:    local.get 11
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 3
-; CHECK-NEXT:    local.get 8
+; CHECK-NEXT:    local.get 13
 ; CHECK-NEXT:    i32x4.min_s
-; CHECK-NEXT:    local.get 9
+; CHECK-NEXT:    local.get 14
 ; CHECK-NEXT:    i32x4.max_s
 ; CHECK-NEXT:    i16x8.narrow_i32x4_u
 ; CHECK-NEXT:    # fallthrough-return
@@ -1186,7 +1153,7 @@ entry:
 
 define <2 x i64> @stest_f16i64(<2 x half> %x) {
 ; CHECK-LABEL: stest_f16i64:
-; CHECK:         .functype stest_f16i64 (f32, f32) -> (v128)
+; CHECK:         .functype stest_f16i64 (i32, i32) -> (v128)
 ; CHECK-NEXT:    .local i32, i64, i64, i64, i64
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    global.get __stack_pointer
@@ -1198,12 +1165,10 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) {
 ; CHECK-NEXT:    i32.const 16
 ; CHECK-NEXT:    i32.add
 ; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    call __fixsfti
 ; CHECK-NEXT:    local.get 2
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    call __fixsfti
 ; CHECK-NEXT:    local.get 2
@@ -1297,7 +1262,7 @@ entry:
 
 define <2 x i64> @utesth_f16i64(<2 x half> %x) {
 ; CHECK-LABEL: utesth_f16i64:
-; CHECK:         .functype utesth_f16i64 (f32, f32) -> (v128)
+; CHECK:         .functype utesth_f16i64 (i32, i32) -> (v128)
 ; CHECK-NEXT:    .local i32, i64, i64, i64, i64
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    global.get __stack_pointer
@@ -1309,12 +1274,10 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
 ; CHECK-NEXT:    i32.const 16
 ; CHECK-NEXT:    i32.add
 ; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    call __fixunssfti
 ; CHECK-NEXT:    local.get 2
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    call __fixunssfti
 ; CHECK-NEXT:    local.get 2
@@ -1356,7 +1319,7 @@ entry:
 
 define <2 x i64> @ustest_f16i64(<2 x half> %x) {
 ; CHECK-LABEL: ustest_f16i64:
-; CHECK:         .functype ustest_f16i64 (f32, f32) -> (v128)
+; CHECK:         .functype ustest_f16i64 (i32, i32) -> (v128)
 ; CHECK-NEXT:    .local i32, i64, i64, i64, i64
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    global.get __stack_pointer
@@ -1368,12 +1331,10 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) {
 ; CHECK-NEXT:    i32.const 16
 ; CHECK-NEXT:    i32.add
 ; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    call __fixsfti
 ; CHECK-NEXT:    local.get 2
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    call __fixsfti
 ; CHECK-NEXT:    local.get 2
@@ -1616,27 +1577,24 @@ entry:
 
 define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
 ; CHECK-LABEL: stest_f16i32_mm:
-; CHECK:         .functype stest_f16i32_mm (f32, f32, f32, f32) -> (v128)
+; CHECK:         .functype stest_f16i32_mm (i32, i32, i32, i32) -> (v128)
+; CHECK-NEXT:    .local f32
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 1
+; CHECK-NEXT:    local.set 4
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.splat
-; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 4
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 2
 ; CHECK-NEXT:    local.get 3
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 3
@@ -1651,27 +1609,24 @@ entry:
 
 define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
 ; CHECK-LABEL: utesth_f16i32_mm:
-; CHECK:         .functype utesth_f16i32_mm (f32, f32, f32, f32) -> (v128)
+; CHECK:         .functype utesth_f16i32_mm (i32, i32, i32, i32) -> (v128)
+; CHECK-NEXT:    .local f32
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 1
+; CHECK-NEXT:    local.set 4
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.splat
-; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 4
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.replace_lane 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.replace_lane 2
 ; CHECK-NEXT:    local.get 3
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.replace_lane 3
@@ -1685,27 +1640,24 @@ entry:
 
 define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
 ; CHECK-LABEL: ustest_f16i32_mm:
-; CHECK:         .functype ustest_f16i32_mm (f32, f32, f32, f32) -> (v128)
+; CHECK:         .functype ustest_f16i32_mm (i32, i32, i32, i32) -> (v128)
+; CHECK-NEXT:    .local f32
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 1
+; CHECK-NEXT:    local.set 4
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.splat
-; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 4
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.replace_lane 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.replace_lane 2
 ; CHECK-NEXT:    local.get 3
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.replace_lane 3
@@ -1860,73 +1812,65 @@ entry:
 
 define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
 ; CHECK-LABEL: stest_f16i16_mm:
-; CHECK:         .functype stest_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
-; CHECK-NEXT:    .local v128, v128, v128
+; CHECK:         .functype stest_f16i16_mm (i32, i32, i32, i32, i32, i32, i32, i32) -> (v128)
+; CHECK-NEXT:    .local f32, f32, f32, f32, f32, v128, v128, v128
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 5
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 5
+; CHECK-NEXT:    local.set 8
 ; CHECK-NEXT:    local.get 4
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 4
+; CHECK-NEXT:    local.set 9
 ; CHECK-NEXT:    local.get 6
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 6
+; CHECK-NEXT:    local.set 10
 ; CHECK-NEXT:    local.get 7
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 7
+; CHECK-NEXT:    local.set 11
 ; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 1
+; CHECK-NEXT:    local.set 12
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.splat
-; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 12
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 2
 ; CHECK-NEXT:    local.get 3
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 3
 ; CHECK-NEXT:    v128.const 32767, 32767, 32767, 32767
-; CHECK-NEXT:    local.tee 8
+; CHECK-NEXT:    local.tee 13
 ; CHECK-NEXT:    i32x4.min_s
 ; CHECK-NEXT:    v128.const -32768, -32768, -32768, -32768
-; CHECK-NEXT:    local.tee 9
+; CHECK-NEXT:    local.tee 14
 ; CHECK-NEXT:    i32x4.max_s
 ; CHECK-NEXT:    v128.const 65535, 65535, 65535, 65535
-; CHECK-NEXT:    local.tee 10
+; CHECK-NEXT:    local.tee 15
 ; CHECK-NEXT:    v128.and
-; CHECK-NEXT:    local.get 4
+; CHECK-NEXT:    local.get 9
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.splat
-; CHECK-NEXT:    local.get 5
+; CHECK-NEXT:    local.get 8
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 1
-; CHECK-NEXT:    local.get 6
+; CHECK-NEXT:    local.get 10
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 2
-; CHECK-NEXT:    local.get 7
+; CHECK-NEXT:    local.get 11
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 3
-; CHECK-NEXT:    local.get 8
+; CHECK-NEXT:    local.get 13
 ; CHECK-NEXT:    i32x4.min_s
-; CHECK-NEXT:    local.get 9
+; CHECK-NEXT:    local.get 14
 ; CHECK-NEXT:    i32x4.max_s
-; CHECK-NEXT:    local.get 10
+; CHECK-NEXT:    local.get 15
 ; CHECK-NEXT:    v128.and
 ; CHECK-NEXT:    i16x8.narrow_i32x4_u
 ; CHECK-NEXT:    # fallthrough-return
@@ -1940,63 +1884,55 @@ entry:
 
 define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
 ; CHECK-LABEL: utesth_f16i16_mm:
-; CHECK:         .functype utesth_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
-; CHECK-NEXT:    .local v128
+; CHECK:         .functype utesth_f16i16_mm (i32, i32, i32, i32, i32, i32, i32, i32) -> (v128)
+; CHECK-NEXT:    .local f32, f32, f32, f32, f32, v128
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 5
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 5
+; CHECK-NEXT:    local.set 8
 ; CHECK-NEXT:    local.get 4
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 4
+; CHECK-NEXT:    local.set 9
 ; CHECK-NEXT:    local.get 6
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 6
+; CHECK-NEXT:    local.set 10
 ; CHECK-NEXT:    local.get 7
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 7
+; CHECK-NEXT:    local.set 11
 ; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 1
+; CHECK-NEXT:    local.set 12
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.splat
-; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 12
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.replace_lane 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.replace_lane 2
 ; CHECK-NEXT:    local.get 3
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.replace_lane 3
 ; CHECK-NEXT:    v128.const 65535, 65535, 65535, 65535
-; CHECK-NEXT:    local.tee 8
+; CHECK-NEXT:    local.tee 13
 ; CHECK-NEXT:    i32x4.min_u
-; CHECK-NEXT:    local.get 4
+; CHECK-NEXT:    local.get 9
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.splat
-; CHECK-NEXT:    local.get 5
+; CHECK-NEXT:    local.get 8
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.replace_lane 1
-; CHECK-NEXT:    local.get 6
+; CHECK-NEXT:    local.get 10
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.replace_lane 2
-; CHECK-NEXT:    local.get 7
+; CHECK-NEXT:    local.get 11
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.replace_lane 3
-; CHECK-NEXT:    local.get 8
+; CHECK-NEXT:    local.get 13
 ; CHECK-NEXT:    i32x4.min_u
 ; CHECK-NEXT:    i16x8.narrow_i32x4_u
 ; CHECK-NEXT:    # fallthrough-return
@@ -2009,68 +1945,60 @@ entry:
 
 define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
 ; CHECK-LABEL: ustest_f16i16_mm:
-; CHECK:         .functype ustest_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
-; CHECK-NEXT:    .local v128, v128
+; CHECK:         .functype ustest_f16i16_mm (i32, i32, i32, i32, i32, i32, i32, i32) -> (v128)
+; CHECK-NEXT:    .local f32, f32, f32, f32, f32, v128, v128
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 5
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 5
+; CHECK-NEXT:    local.set 8
 ; CHECK-NEXT:    local.get 4
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 4
+; CHECK-NEXT:    local.set 9
 ; CHECK-NEXT:    local.get 6
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 6
+; CHECK-NEXT:    local.set 10
 ; CHECK-NEXT:    local.get 7
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 7
+; CHECK-NEXT:    local.set 11
 ; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 1
+; CHECK-NEXT:    local.set 12
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.splat
-; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 12
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 2
 ; CHECK-NEXT:    local.get 3
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 3
 ; CHECK-NEXT:    v128.const 65535, 65535, 65535, 65535
-; CHECK-NEXT:    local.tee 8
+; CHECK-NEXT:    local.tee 13
 ; CHECK-NEXT:    i32x4.min_s
 ; CHECK-NEXT:    v128.const 0, 0, 0, 0
-; CHECK-NEXT:    local.tee 9
+; CHECK-NEXT:    local.tee 14
 ; CHECK-NEXT:    i32x4.max_s
-; CHECK-NEXT:    local.get 4
+; CHECK-NEXT:    local.get 9
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.splat
-; CHECK-NEXT:    local.get 5
+; CHECK-NEXT:    local.get 8
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 1
-; CHECK-NEXT:    local.get 6
+; CHECK-NEXT:    local.get 10
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 2
-; CHECK-NEXT:    local.get 7
+; CHECK-NEXT:    local.get 11
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 3
-; CHECK-NEXT:    local.get 8
+; CHECK-NEXT:    local.get 13
 ; CHECK-NEXT:    i32x4.min_s
-; CHECK-NEXT:    local.get 9
+; CHECK-NEXT:    local.get 14
 ; CHECK-NEXT:    i32x4.max_s
 ; CHECK-NEXT:    i16x8.narrow_i32x4_u
 ; CHECK-NEXT:    # fallthrough-return
@@ -2566,7 +2494,7 @@ entry:
 
 define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
 ; CHECK-LABEL: stest_f16i64_mm:
-; CHECK:         .functype stest_f16i64_mm (f32, f32) -> (v128)
+; CHECK:         .functype stest_f16i64_mm (i32, i32) -> (v128)
 ; CHECK-NEXT:    .local i32, i64, i64, i64, i64
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    global.get __stack_pointer
@@ -2578,12 +2506,10 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
 ; CHECK-NEXT:    i32.const 16
 ; CHECK-NEXT:    i32.add
 ; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    call __fixsfti
 ; CHECK-NEXT:    local.get 2
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    call __fixsfti
 ; CHECK-NEXT:    local.get 2
@@ -2675,7 +2601,7 @@ entry:
 
 define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
 ; CHECK-LABEL: utesth_f16i64_mm:
-; CHECK:         .functype utesth_f16i64_mm (f32, f32) -> (v128)
+; CHECK:         .functype utesth_f16i64_mm (i32, i32) -> (v128)
 ; CHECK-NEXT:    .local i32, i64, i64, i64, i64
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    global.get __stack_pointer
@@ -2687,12 +2613,10 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
 ; CHECK-NEXT:    i32.const 16
 ; CHECK-NEXT:    i32.add
 ; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    call __fixunssfti
 ; CHECK-NEXT:    local.get 2
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    call __fixunssfti
 ; CHECK-NEXT:    local.get 2
@@ -2733,7 +2657,7 @@ entry:
 
 define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
 ; CHECK-LABEL: ustest_f16i64_mm:
-; CHECK:         .functype ustest_f16i64_mm (f32, f32) -> (v128)
+; CHECK:         .functype ustest_f16i64_mm (i32, i32) -> (v128)
 ; CHECK-NEXT:    .local i32, i64, i64, i64, i64
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    global.get __stack_pointer
@@ -2745,12 +2669,10 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
 ; CHECK-NEXT:    i32.const 16
 ; CHECK-NEXT:    i32.add
 ; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    call __fixsfti
 ; CHECK-NEXT:    local.get 2
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    call __fixsfti
 ; CHECK-NEXT:    local.get 2
diff --git a/llvm/test/CodeGen/WebAssembly/llvm.sincos.ll b/llvm/test/CodeGen/WebAssembly/llvm.sincos.ll
index 3c10b09525573..0608a60b739f8 100644
--- a/llvm/test/CodeGen/WebAssembly/llvm.sincos.ll
+++ b/llvm/test/CodeGen/WebAssembly/llvm.sincos.ll
@@ -4,36 +4,36 @@
 
 define { half, half } @test_sincos_f16(half %a) #0 {
 ; WASM32-LABEL: test_sincos_f16:
-; WASM32:         .functype test_sincos_f16 (i32, f32) -> ()
+; WASM32:         .functype test_sincos_f16 (i32, i32) -> ()
+; WASM32-NEXT:    .local f32
 ; WASM32-NEXT:  # %bb.0:
 ; WASM32-NEXT:    local.get 0
 ; WASM32-NEXT:    local.get 1
-; WASM32-NEXT:    call __truncsfhf2
 ; WASM32-NEXT:    call __extendhfsf2
-; WASM32-NEXT:    local.tee 1
+; WASM32-NEXT:    local.tee 2
 ; WASM32-NEXT:    call cosf
 ; WASM32-NEXT:    call __truncsfhf2
 ; WASM32-NEXT:    i32.store16 2
 ; WASM32-NEXT:    local.get 0
-; WASM32-NEXT:    local.get 1
+; WASM32-NEXT:    local.get 2
 ; WASM32-NEXT:    call sinf
 ; WASM32-NEXT:    call __truncsfhf2
 ; WASM32-NEXT:    i32.store16 0
 ; WASM32-NEXT:    # fallthrough-return
 ;
 ; WASM64-LABEL: test_sincos_f16:
-; WASM64:         .functype test_sincos_f16 (i64, f32) -> ()
+; WASM64:         .functype test_sincos_f16 (i64, i32) -> ()
+; WASM64-NEXT:    .local f32
 ; WASM64-NEXT:  # %bb.0:
 ; WASM64-NEXT:    local.get 0
 ; WASM64-NEXT:    local.get 1
-; WASM64-NEXT:    call __truncsfhf2
 ; WASM64-NEXT:    call __extendhfsf2
-; WASM64-NEXT:    local.tee 1
+; WASM64-NEXT:    local.tee 2
 ; WASM64-NEXT:    call cosf
 ; WASM64-NEXT:    call __truncsfhf2
 ; WASM64-NEXT:    i32.store16 2
 ; WASM64-NEXT:    local.get 0
-; WASM64-NEXT:    local.get 1
+; WASM64-NEXT:    local.get 2
 ; WASM64-NEXT:    call sinf
 ; WASM64-NEXT:    call __truncsfhf2
 ; WASM64-NEXT:    i32.store16 0
@@ -44,12 +44,12 @@ define { half, half } @test_sincos_f16(half %a) #0 {
 
 define half @test_sincos_f16_only_use_sin(half %a) #0 {
 ; CHECK-LABEL: test_sincos_f16_only_use_sin:
-; CHECK:         .functype test_sincos_f16_only_use_sin (f32) -> (f32)
+; CHECK:         .functype test_sincos_f16_only_use_sin (i32) -> (i32)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    call sinf
+; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    # fallthrough-return
   %result = call { half, half } @llvm.sincos.f16(half %a)
   %result.0 = extractvalue { half, half } %result, 0
@@ -58,12 +58,12 @@ define half @test_sincos_f16_only_use_sin(half %a) #0 {
 
 define half @test_sincos_f16_only_use_cos(half %a) #0 {
 ; CHECK-LABEL: test_sincos_f16_only_use_cos:
-; CHECK:         .functype test_sincos_f16_only_use_cos (f32) -> (f32)
+; CHECK:         .functype test_sincos_f16_only_use_cos (i32) -> (i32)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    call cosf
+; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    # fallthrough-return
   %result = call { half, half } @llvm.sincos.f16(half %a)
   %result.1 = extractvalue { half, half } %result, 1
@@ -72,62 +72,60 @@ define half @test_sincos_f16_only_use_cos(half %a) #0 {
 
 define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) #0 {
 ; WASM32-LABEL: test_sincos_v2f16:
-; WASM32:         .functype test_sincos_v2f16 (i32, f32, f32) -> ()
+; WASM32:         .functype test_sincos_v2f16 (i32, i32, i32) -> ()
+; WASM32-NEXT:    .local f32, f32
 ; WASM32-NEXT:  # %bb.0:
 ; WASM32-NEXT:    local.get 0
 ; WASM32-NEXT:    local.get 2
-; WASM32-NEXT:    call __truncsfhf2
 ; WASM32-NEXT:    call __extendhfsf2
-; WASM32-NEXT:    local.tee 2
+; WASM32-NEXT:    local.tee 3
 ; WASM32-NEXT:    call cosf
 ; WASM32-NEXT:    call __truncsfhf2
 ; WASM32-NEXT:    i32.store16 6
 ; WASM32-NEXT:    local.get 0
 ; WASM32-NEXT:    local.get 1
-; WASM32-NEXT:    call __truncsfhf2
 ; WASM32-NEXT:    call __extendhfsf2
-; WASM32-NEXT:    local.tee 1
+; WASM32-NEXT:    local.tee 4
 ; WASM32-NEXT:    call cosf
 ; WASM32-NEXT:    call __truncsfhf2
 ; WASM32-NEXT:    i32.store16 4
 ; WASM32-NEXT:    local.get 0
-; WASM32-NEXT:    local.get 2
+; WASM32-NEXT:    local.get 3
 ; WASM32-NEXT:    call sinf
 ; WASM32-NEXT:    call __truncsfhf2
 ; WASM32-NEXT:    i32.store16 2
 ; WASM32-NEXT:    local.get 0
-; WASM32-NEXT:    local.get 1
+; WASM32-NEXT:    local.get 4
 ; WASM32-NEXT:    call sinf
 ; WASM32-NEXT:    call __truncsfhf2
 ; WASM32-NEXT:    i32.store16 0
 ; WASM32-NEXT:    # fallthrough-return
 ;
 ; WASM64-LABEL: test_sincos_v2f16:
-; WASM64:         .functype test_sincos_v2f16 (i64, f32, f32) -> ()
+; WASM64:         .functype test_sincos_v2f16 (i64, i32, i32) -> ()
+; WASM64-NEXT:    .local f32, f32
 ; WASM64-NEXT:  # %bb.0:
 ; WASM64-NEXT:    local.get 0
 ; WASM64-NEXT:    local.get 2
-; WASM64-NEXT:    call __truncsfhf2
 ; WASM64-NEXT:    call __extendhfsf2
-; WASM64-NEXT:    local.tee 2
+; WASM64-NEXT:    local.tee 3
 ; WASM64-NEXT:    call cosf
 ; WASM64-NEXT:    call __truncsfhf2
 ; WASM64-NEXT:    i32.store16 6
 ; WASM64-NEXT:    local.get 0
 ; WASM64-NEXT:    local.get 1
-; WASM64-NEXT:    call __truncsfhf2
 ; WASM64-NEXT:    call __extendhfsf2
-; WASM64-NEXT:    local.tee 1
+; WASM64-NEXT:    local.tee 4
 ; WASM64-NEXT:    call cosf
 ; WASM64-NEXT:    call __truncsfhf2
 ; WASM64-NEXT:    i32.store16 4
 ; WASM64-NEXT:    local.get 0
-; WASM64-NEXT:    local.get 2
+; WASM64-NEXT:    local.get 3
 ; WASM64-NEXT:    call sinf
 ; WASM64-NEXT:    call __truncsfhf2
 ; WASM64-NEXT:    i32.store16 2
 ; WASM64-NEXT:    local.get 0
-; WASM64-NEXT:    local.get 1
+; WASM64-NEXT:    local.get 4
 ; WASM64-NEXT:    call sinf
 ; WASM64-NEXT:    call __truncsfhf2
 ; WASM64-NEXT:    i32.store16 0