[llvm] [RISCV]: Implemented softening of `FCANONICALIZE` (PR #169234)

Kevin Per via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 3 12:36:29 PST 2025


https://github.com/kper updated https://github.com/llvm/llvm-project/pull/169234

>From a7af5b21388d4941f9008582e081ae8b1312eb63 Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Sun, 23 Nov 2025 21:02:45 +0100
Subject: [PATCH 1/5] [RISCV]: Implemented softening of `FCANONICALIZE`

---
 .../SelectionDAG/LegalizeFloatTypes.cpp       |  8 +++
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |  1 +
 llvm/test/CodeGen/RISCV/fcanonicalize.ll      | 64 +++++++++++++++++++
 3 files changed, 73 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/fcanonicalize.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 383a025a4d916..d405295229203 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -70,6 +70,8 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
     case ISD::EXTRACT_VECTOR_ELT:
       R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N, ResNo); break;
     case ISD::FABS:        R = SoftenFloatRes_FABS(N); break;
+    case ISD::FCANONICALIZE:
+      R = SoftenFloatRes_FCANONICALIZE(N); break;
     case ISD::STRICT_FMINNUM:
     case ISD::FMINNUM:     R = SoftenFloatRes_FMINNUM(N); break;
     case ISD::STRICT_FMAXNUM:
@@ -311,6 +313,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
   return DAG.getNode(ISD::AND, SDLoc(N), NVT, Op, Mask);
 }
 
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCANONICALIZE(SDNode *N) {
+  return SoftenFloatRes_Unary(
+      N, GetFPLibCall(N->getValueType(0), RTLIB::FMIN_F32, RTLIB::FMIN_F64,
+                      RTLIB::FMIN_F80, RTLIB::FMIN_F128, RTLIB::FMIN_PPCF128));
+}
+
 SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) {
   if (SDValue SelCC = TLI.createSelectForFMINNUM_FMAXNUM(N, DAG))
     return SoftenFloatRes_SELECT_CC(SelCC.getNode());
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index ede522eff6df3..c90cb7bc88f57 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -585,6 +585,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue SoftenFloatRes_FASIN(SDNode *N);
   SDValue SoftenFloatRes_FATAN(SDNode *N);
   SDValue SoftenFloatRes_FATAN2(SDNode *N);
+  SDValue SoftenFloatRes_FCANONICALIZE(SDNode *N);
   SDValue SoftenFloatRes_FMINNUM(SDNode *N);
   SDValue SoftenFloatRes_FMAXNUM(SDNode *N);
   SDValue SoftenFloatRes_FMINIMUMNUM(SDNode *N);
diff --git a/llvm/test/CodeGen/RISCV/fcanonicalize.ll b/llvm/test/CodeGen/RISCV/fcanonicalize.ll
new file mode 100644
index 0000000000000..334657bdf92de
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/fcanonicalize.ll
@@ -0,0 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv64 < %s | FileCheck -check-prefix=RV64I %s
+; RUN: llc -mtriple=riscv64 -mattr=+d < %s | FileCheck -check-prefix=RV64D %s
+
+define double @max(double, double) unnamed_addr #0 {
+; RV64I-LABEL: max:
+; RV64I:       # %bb.0: # %start
+; RV64I-NEXT:    addi sp, sp, -32
+; RV64I-NEXT:    .cfi_def_cfa_offset 32
+; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    .cfi_offset ra, -8
+; RV64I-NEXT:    .cfi_offset s0, -16
+; RV64I-NEXT:    .cfi_offset s1, -24
+; RV64I-NEXT:    .cfi_offset s2, -32
+; RV64I-NEXT:    mv s0, a1
+; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    call __ltdf2
+; RV64I-NEXT:    srli s2, a0, 63
+; RV64I-NEXT:    mv a0, s1
+; RV64I-NEXT:    mv a1, s1
+; RV64I-NEXT:    call __unorddf2
+; RV64I-NEXT:    snez a0, a0
+; RV64I-NEXT:    or a0, a0, s2
+; RV64I-NEXT:    bnez a0, .LBB0_2
+; RV64I-NEXT:  # %bb.1: # %start
+; RV64I-NEXT:    mv s0, s1
+; RV64I-NEXT:  .LBB0_2: # %start
+; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    call fmin
+; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    .cfi_restore ra
+; RV64I-NEXT:    .cfi_restore s0
+; RV64I-NEXT:    .cfi_restore s1
+; RV64I-NEXT:    .cfi_restore s2
+; RV64I-NEXT:    addi sp, sp, 32
+; RV64I-NEXT:    .cfi_def_cfa_offset 0
+; RV64I-NEXT:    ret
+;
+; RV64D-LABEL: max:
+; RV64D:       # %bb.0: # %start
+; RV64D-NEXT:    flt.d a0, fa0, fa1
+; RV64D-NEXT:    feq.d a1, fa0, fa0
+; RV64D-NEXT:    xori a1, a1, 1
+; RV64D-NEXT:    or a0, a1, a0
+; RV64D-NEXT:    bnez a0, .LBB0_2
+; RV64D-NEXT:  # %bb.1: # %start
+; RV64D-NEXT:    fmv.d fa1, fa0
+; RV64D-NEXT:  .LBB0_2: # %start
+; RV64D-NEXT:    fmin.d fa0, fa1, fa1
+; RV64D-NEXT:    ret
+start:
+  %2 = fcmp olt double %0, %1
+  %3 = fcmp uno double %0, 0.000000e+00
+  %or.cond.i.i = or i1 %3, %2
+  %4 = select i1 %or.cond.i.i, double %1, double %0
+  %5 = tail call double @llvm.canonicalize.f64(double %4) #2
+  ret double %5
+}

>From 8aa740662963a377bf0c83845d6c6523832b44a7 Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Mon, 24 Nov 2025 11:53:17 +0100
Subject: [PATCH 2/5] [RISCV]: Map to FMINIMUMNUM

---
 .../SelectionDAG/LegalizeFloatTypes.cpp       |   11 +-
 llvm/test/CodeGen/RISCV/fcanonicalize.ll      |   64 -
 llvm/test/CodeGen/RISCV/fp-fcanonicalize.ll   | 3657 ++++++++++++++++-
 3 files changed, 3606 insertions(+), 126 deletions(-)
 delete mode 100644 llvm/test/CodeGen/RISCV/fcanonicalize.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index d405295229203..a162bfdc52189 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -314,9 +314,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FCANONICALIZE(SDNode *N) {
-  return SoftenFloatRes_Unary(
-      N, GetFPLibCall(N->getValueType(0), RTLIB::FMIN_F32, RTLIB::FMIN_F64,
-                      RTLIB::FMIN_F80, RTLIB::FMIN_F128, RTLIB::FMIN_PPCF128));
+  SDLoc dl(N);
+  auto Node = DAG.getNode(ISD::FMINIMUMNUM, dl, N->getValueType(0),
+                          N->getOperand(0), N->getOperand(0));
+  return SoftenFloatRes_Binary(
+      Node.getNode(),
+      GetFPLibCall(N->getValueType(0), RTLIB::FMINIMUM_NUM_F32,
+                   RTLIB::FMINIMUM_NUM_F64, RTLIB::FMINIMUM_NUM_F80,
+                   RTLIB::FMINIMUM_NUM_F128, RTLIB::FMINIMUM_NUM_PPCF128));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) {
diff --git a/llvm/test/CodeGen/RISCV/fcanonicalize.ll b/llvm/test/CodeGen/RISCV/fcanonicalize.ll
deleted file mode 100644
index 334657bdf92de..0000000000000
--- a/llvm/test/CodeGen/RISCV/fcanonicalize.ll
+++ /dev/null
@@ -1,64 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=riscv64 < %s | FileCheck -check-prefix=RV64I %s
-; RUN: llc -mtriple=riscv64 -mattr=+d < %s | FileCheck -check-prefix=RV64D %s
-
-define double @max(double, double) unnamed_addr #0 {
-; RV64I-LABEL: max:
-; RV64I:       # %bb.0: # %start
-; RV64I-NEXT:    addi sp, sp, -32
-; RV64I-NEXT:    .cfi_def_cfa_offset 32
-; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    .cfi_offset ra, -8
-; RV64I-NEXT:    .cfi_offset s0, -16
-; RV64I-NEXT:    .cfi_offset s1, -24
-; RV64I-NEXT:    .cfi_offset s2, -32
-; RV64I-NEXT:    mv s0, a1
-; RV64I-NEXT:    mv s1, a0
-; RV64I-NEXT:    call __ltdf2
-; RV64I-NEXT:    srli s2, a0, 63
-; RV64I-NEXT:    mv a0, s1
-; RV64I-NEXT:    mv a1, s1
-; RV64I-NEXT:    call __unorddf2
-; RV64I-NEXT:    snez a0, a0
-; RV64I-NEXT:    or a0, a0, s2
-; RV64I-NEXT:    bnez a0, .LBB0_2
-; RV64I-NEXT:  # %bb.1: # %start
-; RV64I-NEXT:    mv s0, s1
-; RV64I-NEXT:  .LBB0_2: # %start
-; RV64I-NEXT:    mv a0, s0
-; RV64I-NEXT:    call fmin
-; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    .cfi_restore ra
-; RV64I-NEXT:    .cfi_restore s0
-; RV64I-NEXT:    .cfi_restore s1
-; RV64I-NEXT:    .cfi_restore s2
-; RV64I-NEXT:    addi sp, sp, 32
-; RV64I-NEXT:    .cfi_def_cfa_offset 0
-; RV64I-NEXT:    ret
-;
-; RV64D-LABEL: max:
-; RV64D:       # %bb.0: # %start
-; RV64D-NEXT:    flt.d a0, fa0, fa1
-; RV64D-NEXT:    feq.d a1, fa0, fa0
-; RV64D-NEXT:    xori a1, a1, 1
-; RV64D-NEXT:    or a0, a1, a0
-; RV64D-NEXT:    bnez a0, .LBB0_2
-; RV64D-NEXT:  # %bb.1: # %start
-; RV64D-NEXT:    fmv.d fa1, fa0
-; RV64D-NEXT:  .LBB0_2: # %start
-; RV64D-NEXT:    fmin.d fa0, fa1, fa1
-; RV64D-NEXT:    ret
-start:
-  %2 = fcmp olt double %0, %1
-  %3 = fcmp uno double %0, 0.000000e+00
-  %or.cond.i.i = or i1 %3, %2
-  %4 = select i1 %or.cond.i.i, double %1, double %0
-  %5 = tail call double @llvm.canonicalize.f64(double %4) #2
-  ret double %5
-}
diff --git a/llvm/test/CodeGen/RISCV/fp-fcanonicalize.ll b/llvm/test/CodeGen/RISCV/fp-fcanonicalize.ll
index e9b771a0698de..5b0b77156ebb0 100644
--- a/llvm/test/CodeGen/RISCV/fp-fcanonicalize.ll
+++ b/llvm/test/CodeGen/RISCV/fp-fcanonicalize.ll
@@ -1,7 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; FIXME: @llvm.canonicalize doesn't support soft-float abi yet.
+; RUN: llc --mtriple=riscv64 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT-RV64
 ; RUN: llc --mtriple=riscv64 --mattr=+d,+zfh < %s | FileCheck %s --check-prefixes=CHECK,CHECK-FP16-RV64
 ; RUN: llc --mtriple=riscv64 --mattr=+d,-zfh < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16-RV64
+; RUN: llc --mtriple=riscv32 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT-RV32
 ; RUN: llc --mtriple=riscv32 --mattr=+d,+zfh < %s | FileCheck %s --check-prefixes=CHECK,CHECK-FP16-RV32
 ; RUN: llc --mtriple=riscv32 --mattr=+d,-zfh < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16-RV32
 
@@ -10,6 +11,42 @@ declare float @llvm.fcanonicalize.f32(float)
 declare double @llvm.fcanonicalize.f64(double)
 
 define half @fcanonicalize_f16(half %x) {
+; RV64-SOFT-LABEL: fcanonicalize_f16:
+; RV64-SOFT:       # %bb.0:
+; RV64-SOFT-NEXT:    addi sp, sp, -16
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 16
+; RV64-SOFT-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    .cfi_offset ra, -8
+; RV64-SOFT-NEXT:    slli a0, a0, 48
+; RV64-SOFT-NEXT:    srli a0, a0, 48
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    .cfi_restore ra
+; RV64-SOFT-NEXT:    addi sp, sp, 16
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV64-SOFT-NEXT:    ret
+;
+; CHECK-SOFT-RV64-LABEL: fcanonicalize_f16:
+; CHECK-SOFT-RV64:       # %bb.0:
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -16
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SOFT-RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset ra, -8
+; CHECK-SOFT-RV64-NEXT:    slli a0, a0, 48
+; CHECK-SOFT-RV64-NEXT:    srli a0, a0, 48
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, 16
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV64-NEXT:    ret
+;
 ; CHECK-FP16-RV64-LABEL: fcanonicalize_f16:
 ; CHECK-FP16-RV64:       # %bb.0:
 ; CHECK-FP16-RV64-NEXT:    fmin.h fa0, fa0, fa0
@@ -34,6 +71,24 @@ define half @fcanonicalize_f16(half %x) {
 ; CHECK-NOFP16-RV64-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NOFP16-RV64-NEXT:    ret
 ;
+; CHECK-SOFT-RV32-LABEL: fcanonicalize_f16:
+; CHECK-SOFT-RV32:       # %bb.0:
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, -16
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SOFT-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset ra, -4
+; CHECK-SOFT-RV32-NEXT:    slli a0, a0, 16
+; CHECK-SOFT-RV32-NEXT:    srli a0, a0, 16
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, 16
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV32-NEXT:    ret
+;
 ; CHECK-FP16-RV32-LABEL: fcanonicalize_f16:
 ; CHECK-FP16-RV32:       # %bb.0:
 ; CHECK-FP16-RV32-NEXT:    fmin.h fa0, fa0, fa0
@@ -57,11 +112,64 @@ define half @fcanonicalize_f16(half %x) {
 ; CHECK-NOFP16-RV32-NEXT:    addi sp, sp, 16
 ; CHECK-NOFP16-RV32-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NOFP16-RV32-NEXT:    ret
+; RV32-SOFT-LABEL: fcanonicalize_f16:
+; RV32-SOFT:       # %bb.0:
+; RV32-SOFT-NEXT:    addi sp, sp, -16
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 16
+; RV32-SOFT-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    .cfi_offset ra, -4
+; RV32-SOFT-NEXT:    slli a0, a0, 16
+; RV32-SOFT-NEXT:    srli a0, a0, 16
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    .cfi_restore ra
+; RV32-SOFT-NEXT:    addi sp, sp, 16
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV32-SOFT-NEXT:    ret
   %z = call half @llvm.canonicalize.f16(half %x)
   ret half %z
 }
 
 define half @fcanonicalize_f16_nnan(half %x) {
+; RV64-SOFT-LABEL: fcanonicalize_f16_nnan:
+; RV64-SOFT:       # %bb.0:
+; RV64-SOFT-NEXT:    addi sp, sp, -16
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 16
+; RV64-SOFT-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    .cfi_offset ra, -8
+; RV64-SOFT-NEXT:    slli a0, a0, 48
+; RV64-SOFT-NEXT:    srli a0, a0, 48
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    .cfi_restore ra
+; RV64-SOFT-NEXT:    addi sp, sp, 16
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV64-SOFT-NEXT:    ret
+;
+; CHECK-SOFT-RV64-LABEL: fcanonicalize_f16_nnan:
+; CHECK-SOFT-RV64:       # %bb.0:
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -16
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SOFT-RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset ra, -8
+; CHECK-SOFT-RV64-NEXT:    slli a0, a0, 48
+; CHECK-SOFT-RV64-NEXT:    srli a0, a0, 48
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, 16
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV64-NEXT:    ret
+;
 ; CHECK-FP16-RV64-LABEL: fcanonicalize_f16_nnan:
 ; CHECK-FP16-RV64:       # %bb.0:
 ; CHECK-FP16-RV64-NEXT:    fmin.h fa0, fa0, fa0
@@ -86,6 +194,24 @@ define half @fcanonicalize_f16_nnan(half %x) {
 ; CHECK-NOFP16-RV64-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NOFP16-RV64-NEXT:    ret
 ;
+; CHECK-SOFT-RV32-LABEL: fcanonicalize_f16_nnan:
+; CHECK-SOFT-RV32:       # %bb.0:
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, -16
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SOFT-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset ra, -4
+; CHECK-SOFT-RV32-NEXT:    slli a0, a0, 16
+; CHECK-SOFT-RV32-NEXT:    srli a0, a0, 16
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, 16
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV32-NEXT:    ret
+;
 ; CHECK-FP16-RV32-LABEL: fcanonicalize_f16_nnan:
 ; CHECK-FP16-RV32:       # %bb.0:
 ; CHECK-FP16-RV32-NEXT:    fmin.h fa0, fa0, fa0
@@ -109,11 +235,108 @@ define half @fcanonicalize_f16_nnan(half %x) {
 ; CHECK-NOFP16-RV32-NEXT:    addi sp, sp, 16
 ; CHECK-NOFP16-RV32-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NOFP16-RV32-NEXT:    ret
+; RV32-SOFT-LABEL: fcanonicalize_f16_nnan:
+; RV32-SOFT:       # %bb.0:
+; RV32-SOFT-NEXT:    addi sp, sp, -16
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 16
+; RV32-SOFT-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    .cfi_offset ra, -4
+; RV32-SOFT-NEXT:    slli a0, a0, 16
+; RV32-SOFT-NEXT:    srli a0, a0, 16
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    .cfi_restore ra
+; RV32-SOFT-NEXT:    addi sp, sp, 16
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV32-SOFT-NEXT:    ret
   %z = call nnan half @llvm.canonicalize.f16(half %x)
   ret half %z
 }
 
 define <2 x half> @fcanonicalize_v2f16(<2 x half> %x) {
+; RV64-SOFT-LABEL: fcanonicalize_v2f16:
+; RV64-SOFT:       # %bb.0:
+; RV64-SOFT-NEXT:    addi sp, sp, -32
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 32
+; RV64-SOFT-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    .cfi_offset ra, -8
+; RV64-SOFT-NEXT:    .cfi_offset s0, -16
+; RV64-SOFT-NEXT:    .cfi_offset s1, -24
+; RV64-SOFT-NEXT:    .cfi_offset s2, -32
+; RV64-SOFT-NEXT:    mv s0, a1
+; RV64-SOFT-NEXT:    lui a1, 16
+; RV64-SOFT-NEXT:    addi s2, a1, -1
+; RV64-SOFT-NEXT:    and a0, a0, s2
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    mv s1, a0
+; RV64-SOFT-NEXT:    and a0, s0, s2
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    mv a0, s1
+; RV64-SOFT-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    .cfi_restore ra
+; RV64-SOFT-NEXT:    .cfi_restore s0
+; RV64-SOFT-NEXT:    .cfi_restore s1
+; RV64-SOFT-NEXT:    .cfi_restore s2
+; RV64-SOFT-NEXT:    addi sp, sp, 32
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV64-SOFT-NEXT:    ret
+;
+; CHECK-SOFT-RV64-LABEL: fcanonicalize_v2f16:
+; CHECK-SOFT-RV64:       # %bb.0:
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -32
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SOFT-RV64-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset ra, -8
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s0, -16
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s1, -24
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s2, -32
+; CHECK-SOFT-RV64-NEXT:    mv s0, a1
+; CHECK-SOFT-RV64-NEXT:    lui a1, 16
+; CHECK-SOFT-RV64-NEXT:    addi s2, a1, -1
+; CHECK-SOFT-RV64-NEXT:    and a0, a0, s2
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    mv s1, a0
+; CHECK-SOFT-RV64-NEXT:    and a0, s0, s2
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s1
+; CHECK-SOFT-RV64-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s0
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s1
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s2
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, 32
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV64-NEXT:    ret
+;
 ; CHECK-FP16-RV64-LABEL: fcanonicalize_v2f16:
 ; CHECK-FP16-RV64:       # %bb.0:
 ; CHECK-FP16-RV64-NEXT:    fmin.h fa0, fa0, fa0
@@ -152,6 +375,46 @@ define <2 x half> @fcanonicalize_v2f16(<2 x half> %x) {
 ; CHECK-NOFP16-RV64-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NOFP16-RV64-NEXT:    ret
 ;
+; CHECK-SOFT-RV32-LABEL: fcanonicalize_v2f16:
+; CHECK-SOFT-RV32:       # %bb.0:
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, -16
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SOFT-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset ra, -4
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s0, -8
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s1, -12
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s2, -16
+; CHECK-SOFT-RV32-NEXT:    mv s0, a1
+; CHECK-SOFT-RV32-NEXT:    lui a1, 16
+; CHECK-SOFT-RV32-NEXT:    addi s2, a1, -1
+; CHECK-SOFT-RV32-NEXT:    and a0, a0, s2
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    mv s1, a0
+; CHECK-SOFT-RV32-NEXT:    and a0, s0, s2
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s1
+; CHECK-SOFT-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s0
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s1
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s2
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, 16
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV32-NEXT:    ret
+;
 ; CHECK-FP16-RV32-LABEL: fcanonicalize_v2f16:
 ; CHECK-FP16-RV32:       # %bb.0:
 ; CHECK-FP16-RV32-NEXT:    fmin.h fa0, fa0, fa0
@@ -189,11 +452,130 @@ define <2 x half> @fcanonicalize_v2f16(<2 x half> %x) {
 ; CHECK-NOFP16-RV32-NEXT:    addi sp, sp, 16
 ; CHECK-NOFP16-RV32-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NOFP16-RV32-NEXT:    ret
+; RV32-SOFT-LABEL: fcanonicalize_v2f16:
+; RV32-SOFT:       # %bb.0:
+; RV32-SOFT-NEXT:    addi sp, sp, -16
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 16
+; RV32-SOFT-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    .cfi_offset ra, -4
+; RV32-SOFT-NEXT:    .cfi_offset s0, -8
+; RV32-SOFT-NEXT:    .cfi_offset s1, -12
+; RV32-SOFT-NEXT:    .cfi_offset s2, -16
+; RV32-SOFT-NEXT:    mv s0, a1
+; RV32-SOFT-NEXT:    lui a1, 16
+; RV32-SOFT-NEXT:    addi s2, a1, -1
+; RV32-SOFT-NEXT:    and a0, a0, s2
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    mv s1, a0
+; RV32-SOFT-NEXT:    and a0, s0, s2
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    mv a0, s1
+; RV32-SOFT-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    .cfi_restore ra
+; RV32-SOFT-NEXT:    .cfi_restore s0
+; RV32-SOFT-NEXT:    .cfi_restore s1
+; RV32-SOFT-NEXT:    .cfi_restore s2
+; RV32-SOFT-NEXT:    addi sp, sp, 16
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV32-SOFT-NEXT:    ret
   %z = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %x)
   ret <2 x half> %z
 }
 
 define <2 x half> @fcanonicalize_v2f16_nnan(<2 x half> %x) {
+; RV64-SOFT-LABEL: fcanonicalize_v2f16_nnan:
+; RV64-SOFT:       # %bb.0:
+; RV64-SOFT-NEXT:    addi sp, sp, -32
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 32
+; RV64-SOFT-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    .cfi_offset ra, -8
+; RV64-SOFT-NEXT:    .cfi_offset s0, -16
+; RV64-SOFT-NEXT:    .cfi_offset s1, -24
+; RV64-SOFT-NEXT:    .cfi_offset s2, -32
+; RV64-SOFT-NEXT:    mv s0, a1
+; RV64-SOFT-NEXT:    lui a1, 16
+; RV64-SOFT-NEXT:    addi s2, a1, -1
+; RV64-SOFT-NEXT:    and a0, a0, s2
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    mv s1, a0
+; RV64-SOFT-NEXT:    and a0, s0, s2
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    mv a0, s1
+; RV64-SOFT-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    .cfi_restore ra
+; RV64-SOFT-NEXT:    .cfi_restore s0
+; RV64-SOFT-NEXT:    .cfi_restore s1
+; RV64-SOFT-NEXT:    .cfi_restore s2
+; RV64-SOFT-NEXT:    addi sp, sp, 32
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV64-SOFT-NEXT:    ret
+;
+; CHECK-SOFT-RV64-LABEL: fcanonicalize_v2f16_nnan:
+; CHECK-SOFT-RV64:       # %bb.0:
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -32
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SOFT-RV64-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset ra, -8
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s0, -16
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s1, -24
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s2, -32
+; CHECK-SOFT-RV64-NEXT:    mv s0, a1
+; CHECK-SOFT-RV64-NEXT:    lui a1, 16
+; CHECK-SOFT-RV64-NEXT:    addi s2, a1, -1
+; CHECK-SOFT-RV64-NEXT:    and a0, a0, s2
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    mv s1, a0
+; CHECK-SOFT-RV64-NEXT:    and a0, s0, s2
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s1
+; CHECK-SOFT-RV64-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s0
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s1
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s2
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, 32
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV64-NEXT:    ret
+;
 ; CHECK-FP16-RV64-LABEL: fcanonicalize_v2f16_nnan:
 ; CHECK-FP16-RV64:       # %bb.0:
 ; CHECK-FP16-RV64-NEXT:    fmin.h fa0, fa0, fa0
@@ -232,6 +614,46 @@ define <2 x half> @fcanonicalize_v2f16_nnan(<2 x half> %x) {
 ; CHECK-NOFP16-RV64-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NOFP16-RV64-NEXT:    ret
 ;
+; CHECK-SOFT-RV32-LABEL: fcanonicalize_v2f16_nnan:
+; CHECK-SOFT-RV32:       # %bb.0:
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, -16
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SOFT-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset ra, -4
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s0, -8
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s1, -12
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s2, -16
+; CHECK-SOFT-RV32-NEXT:    mv s0, a1
+; CHECK-SOFT-RV32-NEXT:    lui a1, 16
+; CHECK-SOFT-RV32-NEXT:    addi s2, a1, -1
+; CHECK-SOFT-RV32-NEXT:    and a0, a0, s2
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    mv s1, a0
+; CHECK-SOFT-RV32-NEXT:    and a0, s0, s2
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s1
+; CHECK-SOFT-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s0
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s1
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s2
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, 16
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV32-NEXT:    ret
+;
 ; CHECK-FP16-RV32-LABEL: fcanonicalize_v2f16_nnan:
 ; CHECK-FP16-RV32:       # %bb.0:
 ; CHECK-FP16-RV32-NEXT:    fmin.h fa0, fa0, fa0
@@ -269,11 +691,178 @@ define <2 x half> @fcanonicalize_v2f16_nnan(<2 x half> %x) {
 ; CHECK-NOFP16-RV32-NEXT:    addi sp, sp, 16
 ; CHECK-NOFP16-RV32-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NOFP16-RV32-NEXT:    ret
+; RV32-SOFT-LABEL: fcanonicalize_v2f16_nnan:
+; RV32-SOFT:       # %bb.0:
+; RV32-SOFT-NEXT:    addi sp, sp, -16
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 16
+; RV32-SOFT-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    .cfi_offset ra, -4
+; RV32-SOFT-NEXT:    .cfi_offset s0, -8
+; RV32-SOFT-NEXT:    .cfi_offset s1, -12
+; RV32-SOFT-NEXT:    .cfi_offset s2, -16
+; RV32-SOFT-NEXT:    mv s0, a1
+; RV32-SOFT-NEXT:    lui a1, 16
+; RV32-SOFT-NEXT:    addi s2, a1, -1
+; RV32-SOFT-NEXT:    and a0, a0, s2
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    mv s1, a0
+; RV32-SOFT-NEXT:    and a0, s0, s2
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    mv a0, s1
+; RV32-SOFT-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    .cfi_restore ra
+; RV32-SOFT-NEXT:    .cfi_restore s0
+; RV32-SOFT-NEXT:    .cfi_restore s1
+; RV32-SOFT-NEXT:    .cfi_restore s2
+; RV32-SOFT-NEXT:    addi sp, sp, 16
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV32-SOFT-NEXT:    ret
   %z = call nnan <2 x half> @llvm.canonicalize.v2f16(<2 x half> %x)
   ret <2 x half> %z
 }
 
 define <4 x half> @fcanonicalize_v4f16(<4 x half> %x) {
+; RV64-SOFT-LABEL: fcanonicalize_v4f16:
+; RV64-SOFT:       # %bb.0:
+; RV64-SOFT-NEXT:    addi sp, sp, -48
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 48
+; RV64-SOFT-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    .cfi_offset ra, -8
+; RV64-SOFT-NEXT:    .cfi_offset s0, -16
+; RV64-SOFT-NEXT:    .cfi_offset s1, -24
+; RV64-SOFT-NEXT:    .cfi_offset s2, -32
+; RV64-SOFT-NEXT:    .cfi_offset s3, -40
+; RV64-SOFT-NEXT:    .cfi_offset s4, -48
+; RV64-SOFT-NEXT:    lhu a2, 0(a1)
+; RV64-SOFT-NEXT:    lhu s1, 8(a1)
+; RV64-SOFT-NEXT:    lhu s2, 16(a1)
+; RV64-SOFT-NEXT:    lhu s3, 24(a1)
+; RV64-SOFT-NEXT:    mv s0, a0
+; RV64-SOFT-NEXT:    mv a0, a2
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    mv s4, a0
+; RV64-SOFT-NEXT:    mv a0, s1
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    mv s1, a0
+; RV64-SOFT-NEXT:    mv a0, s2
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    mv s2, a0
+; RV64-SOFT-NEXT:    mv a0, s3
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    sh s4, 0(s0)
+; RV64-SOFT-NEXT:    sh s1, 2(s0)
+; RV64-SOFT-NEXT:    sh s2, 4(s0)
+; RV64-SOFT-NEXT:    sh a0, 6(s0)
+; RV64-SOFT-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    .cfi_restore ra
+; RV64-SOFT-NEXT:    .cfi_restore s0
+; RV64-SOFT-NEXT:    .cfi_restore s1
+; RV64-SOFT-NEXT:    .cfi_restore s2
+; RV64-SOFT-NEXT:    .cfi_restore s3
+; RV64-SOFT-NEXT:    .cfi_restore s4
+; RV64-SOFT-NEXT:    addi sp, sp, 48
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV64-SOFT-NEXT:    ret
+;
+; CHECK-SOFT-RV64-LABEL: fcanonicalize_v4f16:
+; CHECK-SOFT-RV64:       # %bb.0:
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -48
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-SOFT-RV64-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset ra, -8
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s0, -16
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s1, -24
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s2, -32
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s3, -40
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s4, -48
+; CHECK-SOFT-RV64-NEXT:    lhu a2, 0(a1)
+; CHECK-SOFT-RV64-NEXT:    lhu s1, 8(a1)
+; CHECK-SOFT-RV64-NEXT:    lhu s2, 16(a1)
+; CHECK-SOFT-RV64-NEXT:    lhu s3, 24(a1)
+; CHECK-SOFT-RV64-NEXT:    mv s0, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, a2
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    mv s4, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s1
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    mv s1, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s2
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    mv s2, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s3
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    sh s4, 0(s0)
+; CHECK-SOFT-RV64-NEXT:    sh s1, 2(s0)
+; CHECK-SOFT-RV64-NEXT:    sh s2, 4(s0)
+; CHECK-SOFT-RV64-NEXT:    sh a0, 6(s0)
+; CHECK-SOFT-RV64-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s0
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s1
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s2
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s3
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s4
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, 48
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV64-NEXT:    ret
+;
 ; CHECK-FP16-RV64-LABEL: fcanonicalize_v4f16:
 ; CHECK-FP16-RV64:       # %bb.0:
 ; CHECK-FP16-RV64-NEXT:    fmin.h fa5, fa0, fa0
@@ -358,6 +947,70 @@ define <4 x half> @fcanonicalize_v4f16(<4 x half> %x) {
 ; CHECK-NOFP16-RV64-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NOFP16-RV64-NEXT:    ret
 ;
+; CHECK-SOFT-RV32-LABEL: fcanonicalize_v4f16:
+; CHECK-SOFT-RV32:       # %bb.0:
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, -32
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SOFT-RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset ra, -4
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s0, -8
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s1, -12
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s2, -16
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s3, -20
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s4, -24
+; CHECK-SOFT-RV32-NEXT:    lhu a2, 0(a1)
+; CHECK-SOFT-RV32-NEXT:    lhu s1, 4(a1)
+; CHECK-SOFT-RV32-NEXT:    lhu s2, 8(a1)
+; CHECK-SOFT-RV32-NEXT:    lhu s3, 12(a1)
+; CHECK-SOFT-RV32-NEXT:    mv s0, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, a2
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    mv s4, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s1
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    mv s1, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s2
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    mv s2, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s3
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    sh s4, 0(s0)
+; CHECK-SOFT-RV32-NEXT:    sh s1, 2(s0)
+; CHECK-SOFT-RV32-NEXT:    sh s2, 4(s0)
+; CHECK-SOFT-RV32-NEXT:    sh a0, 6(s0)
+; CHECK-SOFT-RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s0
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s1
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s2
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s3
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s4
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, 32
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV32-NEXT:    ret
+;
 ; CHECK-FP16-RV32-LABEL: fcanonicalize_v4f16:
 ; CHECK-FP16-RV32:       # %bb.0:
 ; CHECK-FP16-RV32-NEXT:    fmin.h fa5, fa0, fa0
@@ -448,11 +1101,202 @@ define <4 x half> @fcanonicalize_v4f16(<4 x half> %x) {
 ; CHECK-NOFP16-RV32-NEXT:    addi sp, sp, 64
 ; CHECK-NOFP16-RV32-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NOFP16-RV32-NEXT:    ret
+; RV32-SOFT-LABEL: fcanonicalize_v4f16:
+; RV32-SOFT:       # %bb.0:
+; RV32-SOFT-NEXT:    addi sp, sp, -32
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 32
+; RV32-SOFT-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    .cfi_offset ra, -4
+; RV32-SOFT-NEXT:    .cfi_offset s0, -8
+; RV32-SOFT-NEXT:    .cfi_offset s1, -12
+; RV32-SOFT-NEXT:    .cfi_offset s2, -16
+; RV32-SOFT-NEXT:    .cfi_offset s3, -20
+; RV32-SOFT-NEXT:    .cfi_offset s4, -24
+; RV32-SOFT-NEXT:    lhu a2, 0(a1)
+; RV32-SOFT-NEXT:    lhu s1, 4(a1)
+; RV32-SOFT-NEXT:    lhu s2, 8(a1)
+; RV32-SOFT-NEXT:    lhu s3, 12(a1)
+; RV32-SOFT-NEXT:    mv s0, a0
+; RV32-SOFT-NEXT:    mv a0, a2
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    mv s4, a0
+; RV32-SOFT-NEXT:    mv a0, s1
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    mv s1, a0
+; RV32-SOFT-NEXT:    mv a0, s2
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    mv s2, a0
+; RV32-SOFT-NEXT:    mv a0, s3
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    sh s4, 0(s0)
+; RV32-SOFT-NEXT:    sh s1, 2(s0)
+; RV32-SOFT-NEXT:    sh s2, 4(s0)
+; RV32-SOFT-NEXT:    sh a0, 6(s0)
+; RV32-SOFT-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    .cfi_restore ra
+; RV32-SOFT-NEXT:    .cfi_restore s0
+; RV32-SOFT-NEXT:    .cfi_restore s1
+; RV32-SOFT-NEXT:    .cfi_restore s2
+; RV32-SOFT-NEXT:    .cfi_restore s3
+; RV32-SOFT-NEXT:    .cfi_restore s4
+; RV32-SOFT-NEXT:    addi sp, sp, 32
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV32-SOFT-NEXT:    ret
   %z = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> %x)
   ret <4 x half> %z
 }
 
 define <4 x half> @fcanonicalize_v4f16_nnan(<4 x half> %x) {
+; RV64-SOFT-LABEL: fcanonicalize_v4f16_nnan:
+; RV64-SOFT:       # %bb.0:
+; RV64-SOFT-NEXT:    addi sp, sp, -48
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 48
+; RV64-SOFT-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    .cfi_offset ra, -8
+; RV64-SOFT-NEXT:    .cfi_offset s0, -16
+; RV64-SOFT-NEXT:    .cfi_offset s1, -24
+; RV64-SOFT-NEXT:    .cfi_offset s2, -32
+; RV64-SOFT-NEXT:    .cfi_offset s3, -40
+; RV64-SOFT-NEXT:    .cfi_offset s4, -48
+; RV64-SOFT-NEXT:    lhu a2, 0(a1)
+; RV64-SOFT-NEXT:    lhu s1, 8(a1)
+; RV64-SOFT-NEXT:    lhu s2, 16(a1)
+; RV64-SOFT-NEXT:    lhu s3, 24(a1)
+; RV64-SOFT-NEXT:    mv s0, a0
+; RV64-SOFT-NEXT:    mv a0, a2
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    mv s4, a0
+; RV64-SOFT-NEXT:    mv a0, s1
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    mv s1, a0
+; RV64-SOFT-NEXT:    mv a0, s2
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    mv s2, a0
+; RV64-SOFT-NEXT:    mv a0, s3
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    sh s4, 0(s0)
+; RV64-SOFT-NEXT:    sh s1, 2(s0)
+; RV64-SOFT-NEXT:    sh s2, 4(s0)
+; RV64-SOFT-NEXT:    sh a0, 6(s0)
+; RV64-SOFT-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    .cfi_restore ra
+; RV64-SOFT-NEXT:    .cfi_restore s0
+; RV64-SOFT-NEXT:    .cfi_restore s1
+; RV64-SOFT-NEXT:    .cfi_restore s2
+; RV64-SOFT-NEXT:    .cfi_restore s3
+; RV64-SOFT-NEXT:    .cfi_restore s4
+; RV64-SOFT-NEXT:    addi sp, sp, 48
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV64-SOFT-NEXT:    ret
+;
+; CHECK-SOFT-RV64-LABEL: fcanonicalize_v4f16_nnan:
+; CHECK-SOFT-RV64:       # %bb.0:
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -48
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-SOFT-RV64-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset ra, -8
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s0, -16
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s1, -24
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s2, -32
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s3, -40
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s4, -48
+; CHECK-SOFT-RV64-NEXT:    lhu a2, 0(a1)
+; CHECK-SOFT-RV64-NEXT:    lhu s1, 8(a1)
+; CHECK-SOFT-RV64-NEXT:    lhu s2, 16(a1)
+; CHECK-SOFT-RV64-NEXT:    lhu s3, 24(a1)
+; CHECK-SOFT-RV64-NEXT:    mv s0, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, a2
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    mv s4, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s1
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    mv s1, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s2
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    mv s2, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s3
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    sh s4, 0(s0)
+; CHECK-SOFT-RV64-NEXT:    sh s1, 2(s0)
+; CHECK-SOFT-RV64-NEXT:    sh s2, 4(s0)
+; CHECK-SOFT-RV64-NEXT:    sh a0, 6(s0)
+; CHECK-SOFT-RV64-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s0
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s1
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s2
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s3
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s4
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, 48
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV64-NEXT:    ret
+;
 ; CHECK-FP16-RV64-LABEL: fcanonicalize_v4f16_nnan:
 ; CHECK-FP16-RV64:       # %bb.0:
 ; CHECK-FP16-RV64-NEXT:    fmin.h fa5, fa0, fa0
@@ -537,6 +1381,70 @@ define <4 x half> @fcanonicalize_v4f16_nnan(<4 x half> %x) {
 ; CHECK-NOFP16-RV64-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NOFP16-RV64-NEXT:    ret
 ;
+; CHECK-SOFT-RV32-LABEL: fcanonicalize_v4f16_nnan:
+; CHECK-SOFT-RV32:       # %bb.0:
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, -32
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SOFT-RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset ra, -4
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s0, -8
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s1, -12
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s2, -16
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s3, -20
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s4, -24
+; CHECK-SOFT-RV32-NEXT:    lhu a2, 0(a1)
+; CHECK-SOFT-RV32-NEXT:    lhu s1, 4(a1)
+; CHECK-SOFT-RV32-NEXT:    lhu s2, 8(a1)
+; CHECK-SOFT-RV32-NEXT:    lhu s3, 12(a1)
+; CHECK-SOFT-RV32-NEXT:    mv s0, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, a2
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    mv s4, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s1
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    mv s1, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s2
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    mv s2, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s3
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    sh s4, 0(s0)
+; CHECK-SOFT-RV32-NEXT:    sh s1, 2(s0)
+; CHECK-SOFT-RV32-NEXT:    sh s2, 4(s0)
+; CHECK-SOFT-RV32-NEXT:    sh a0, 6(s0)
+; CHECK-SOFT-RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s0
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s1
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s2
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s3
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s4
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, 32
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV32-NEXT:    ret
+;
 ; CHECK-FP16-RV32-LABEL: fcanonicalize_v4f16_nnan:
 ; CHECK-FP16-RV32:       # %bb.0:
 ; CHECK-FP16-RV32-NEXT:    fmin.h fa5, fa0, fa0
@@ -627,11 +1535,298 @@ define <4 x half> @fcanonicalize_v4f16_nnan(<4 x half> %x) {
 ; CHECK-NOFP16-RV32-NEXT:    addi sp, sp, 64
 ; CHECK-NOFP16-RV32-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NOFP16-RV32-NEXT:    ret
+; RV32-SOFT-LABEL: fcanonicalize_v4f16_nnan:
+; RV32-SOFT:       # %bb.0:
+; RV32-SOFT-NEXT:    addi sp, sp, -32
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 32
+; RV32-SOFT-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    .cfi_offset ra, -4
+; RV32-SOFT-NEXT:    .cfi_offset s0, -8
+; RV32-SOFT-NEXT:    .cfi_offset s1, -12
+; RV32-SOFT-NEXT:    .cfi_offset s2, -16
+; RV32-SOFT-NEXT:    .cfi_offset s3, -20
+; RV32-SOFT-NEXT:    .cfi_offset s4, -24
+; RV32-SOFT-NEXT:    lhu a2, 0(a1)
+; RV32-SOFT-NEXT:    lhu s1, 4(a1)
+; RV32-SOFT-NEXT:    lhu s2, 8(a1)
+; RV32-SOFT-NEXT:    lhu s3, 12(a1)
+; RV32-SOFT-NEXT:    mv s0, a0
+; RV32-SOFT-NEXT:    mv a0, a2
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    mv s4, a0
+; RV32-SOFT-NEXT:    mv a0, s1
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    mv s1, a0
+; RV32-SOFT-NEXT:    mv a0, s2
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    mv s2, a0
+; RV32-SOFT-NEXT:    mv a0, s3
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    sh s4, 0(s0)
+; RV32-SOFT-NEXT:    sh s1, 2(s0)
+; RV32-SOFT-NEXT:    sh s2, 4(s0)
+; RV32-SOFT-NEXT:    sh a0, 6(s0)
+; RV32-SOFT-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    .cfi_restore ra
+; RV32-SOFT-NEXT:    .cfi_restore s0
+; RV32-SOFT-NEXT:    .cfi_restore s1
+; RV32-SOFT-NEXT:    .cfi_restore s2
+; RV32-SOFT-NEXT:    .cfi_restore s3
+; RV32-SOFT-NEXT:    .cfi_restore s4
+; RV32-SOFT-NEXT:    addi sp, sp, 32
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV32-SOFT-NEXT:    ret
   %z = call nnan <4 x half> @llvm.canonicalize.v4f16(<4 x half> %x)
   ret <4 x half> %z
 }
 
 define <8 x half> @fcanonicalize_v8f16(<8 x half> %x) {
+; RV64-SOFT-LABEL: fcanonicalize_v8f16:
+; RV64-SOFT:       # %bb.0:
+; RV64-SOFT-NEXT:    addi sp, sp, -80
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 80
+; RV64-SOFT-NEXT:    sd ra, 72(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s0, 64(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s1, 56(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s2, 48(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s3, 40(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s4, 32(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s5, 24(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s6, 16(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s7, 8(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s8, 0(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    .cfi_offset ra, -8
+; RV64-SOFT-NEXT:    .cfi_offset s0, -16
+; RV64-SOFT-NEXT:    .cfi_offset s1, -24
+; RV64-SOFT-NEXT:    .cfi_offset s2, -32
+; RV64-SOFT-NEXT:    .cfi_offset s3, -40
+; RV64-SOFT-NEXT:    .cfi_offset s4, -48
+; RV64-SOFT-NEXT:    .cfi_offset s5, -56
+; RV64-SOFT-NEXT:    .cfi_offset s6, -64
+; RV64-SOFT-NEXT:    .cfi_offset s7, -72
+; RV64-SOFT-NEXT:    .cfi_offset s8, -80
+; RV64-SOFT-NEXT:    lhu s7, 32(a1)
+; RV64-SOFT-NEXT:    lhu s5, 40(a1)
+; RV64-SOFT-NEXT:    lhu s3, 48(a1)
+; RV64-SOFT-NEXT:    lhu s1, 56(a1)
+; RV64-SOFT-NEXT:    lhu a2, 0(a1)
+; RV64-SOFT-NEXT:    lhu s4, 8(a1)
+; RV64-SOFT-NEXT:    lhu s6, 16(a1)
+; RV64-SOFT-NEXT:    lhu s8, 24(a1)
+; RV64-SOFT-NEXT:    mv s0, a0
+; RV64-SOFT-NEXT:    mv a0, a2
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    mv s2, a0
+; RV64-SOFT-NEXT:    mv a0, s4
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    mv s4, a0
+; RV64-SOFT-NEXT:    mv a0, s6
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    mv s6, a0
+; RV64-SOFT-NEXT:    mv a0, s8
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    mv s8, a0
+; RV64-SOFT-NEXT:    mv a0, s7
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    mv s7, a0
+; RV64-SOFT-NEXT:    mv a0, s5
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    mv s5, a0
+; RV64-SOFT-NEXT:    mv a0, s3
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    mv s3, a0
+; RV64-SOFT-NEXT:    mv a0, s1
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    sh s7, 8(s0)
+; RV64-SOFT-NEXT:    sh s5, 10(s0)
+; RV64-SOFT-NEXT:    sh s3, 12(s0)
+; RV64-SOFT-NEXT:    sh a0, 14(s0)
+; RV64-SOFT-NEXT:    sh s2, 0(s0)
+; RV64-SOFT-NEXT:    sh s4, 2(s0)
+; RV64-SOFT-NEXT:    sh s6, 4(s0)
+; RV64-SOFT-NEXT:    sh s8, 6(s0)
+; RV64-SOFT-NEXT:    ld ra, 72(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s0, 64(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s1, 56(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s2, 48(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s3, 40(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s4, 32(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s5, 24(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s6, 16(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s7, 8(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s8, 0(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    .cfi_restore ra
+; RV64-SOFT-NEXT:    .cfi_restore s0
+; RV64-SOFT-NEXT:    .cfi_restore s1
+; RV64-SOFT-NEXT:    .cfi_restore s2
+; RV64-SOFT-NEXT:    .cfi_restore s3
+; RV64-SOFT-NEXT:    .cfi_restore s4
+; RV64-SOFT-NEXT:    .cfi_restore s5
+; RV64-SOFT-NEXT:    .cfi_restore s6
+; RV64-SOFT-NEXT:    .cfi_restore s7
+; RV64-SOFT-NEXT:    .cfi_restore s8
+; RV64-SOFT-NEXT:    addi sp, sp, 80
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV64-SOFT-NEXT:    ret
+;
+; CHECK-SOFT-RV64-LABEL: fcanonicalize_v8f16:
+; CHECK-SOFT-RV64:       # %bb.0:
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -80
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-SOFT-RV64-NEXT:    sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s1, 56(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s2, 48(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s3, 40(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s4, 32(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s5, 24(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s6, 16(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s7, 8(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s8, 0(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset ra, -8
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s0, -16
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s1, -24
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s2, -32
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s3, -40
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s4, -48
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s5, -56
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s6, -64
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s7, -72
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s8, -80
+; CHECK-SOFT-RV64-NEXT:    lhu s7, 32(a1)
+; CHECK-SOFT-RV64-NEXT:    lhu s5, 40(a1)
+; CHECK-SOFT-RV64-NEXT:    lhu s3, 48(a1)
+; CHECK-SOFT-RV64-NEXT:    lhu s1, 56(a1)
+; CHECK-SOFT-RV64-NEXT:    lhu a2, 0(a1)
+; CHECK-SOFT-RV64-NEXT:    lhu s4, 8(a1)
+; CHECK-SOFT-RV64-NEXT:    lhu s6, 16(a1)
+; CHECK-SOFT-RV64-NEXT:    lhu s8, 24(a1)
+; CHECK-SOFT-RV64-NEXT:    mv s0, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, a2
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    mv s2, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s4
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    mv s4, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s6
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    mv s6, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s8
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    mv s8, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s7
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    mv s7, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s5
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    mv s5, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s3
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    mv s3, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s1
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    sh s7, 8(s0)
+; CHECK-SOFT-RV64-NEXT:    sh s5, 10(s0)
+; CHECK-SOFT-RV64-NEXT:    sh s3, 12(s0)
+; CHECK-SOFT-RV64-NEXT:    sh a0, 14(s0)
+; CHECK-SOFT-RV64-NEXT:    sh s2, 0(s0)
+; CHECK-SOFT-RV64-NEXT:    sh s4, 2(s0)
+; CHECK-SOFT-RV64-NEXT:    sh s6, 4(s0)
+; CHECK-SOFT-RV64-NEXT:    sh s8, 6(s0)
+; CHECK-SOFT-RV64-NEXT:    ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s1, 56(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s2, 48(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s3, 40(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s4, 32(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s5, 24(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s6, 16(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s7, 8(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s8, 0(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s0
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s1
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s2
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s3
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s4
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s5
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s6
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s7
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s8
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, 80
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV64-NEXT:    ret
+;
 ; CHECK-FP16-RV64-LABEL: fcanonicalize_v8f16:
 ; CHECK-FP16-RV64:       # %bb.0:
 ; CHECK-FP16-RV64-NEXT:    fmin.h fa0, fa0, fa0
@@ -788,6 +1983,118 @@ define <8 x half> @fcanonicalize_v8f16(<8 x half> %x) {
 ; CHECK-NOFP16-RV64-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NOFP16-RV64-NEXT:    ret
 ;
+; CHECK-SOFT-RV32-LABEL: fcanonicalize_v8f16:
+; CHECK-SOFT-RV32:       # %bb.0:
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, -48
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-SOFT-RV32-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s0, 40(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s1, 36(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s2, 32(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s3, 28(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s4, 24(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s5, 20(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s6, 16(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s7, 12(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s8, 8(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset ra, -4
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s0, -8
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s1, -12
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s2, -16
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s3, -20
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s4, -24
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s5, -28
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s6, -32
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s7, -36
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s8, -40
+; CHECK-SOFT-RV32-NEXT:    lhu s7, 16(a1)
+; CHECK-SOFT-RV32-NEXT:    lhu s5, 20(a1)
+; CHECK-SOFT-RV32-NEXT:    lhu s3, 24(a1)
+; CHECK-SOFT-RV32-NEXT:    lhu s1, 28(a1)
+; CHECK-SOFT-RV32-NEXT:    lhu a2, 0(a1)
+; CHECK-SOFT-RV32-NEXT:    lhu s4, 4(a1)
+; CHECK-SOFT-RV32-NEXT:    lhu s6, 8(a1)
+; CHECK-SOFT-RV32-NEXT:    lhu s8, 12(a1)
+; CHECK-SOFT-RV32-NEXT:    mv s0, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, a2
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    mv s2, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s4
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    mv s4, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s6
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    mv s6, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s8
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    mv s8, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s7
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    mv s7, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s5
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    mv s5, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s3
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    mv s3, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s1
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    sh s7, 8(s0)
+; CHECK-SOFT-RV32-NEXT:    sh s5, 10(s0)
+; CHECK-SOFT-RV32-NEXT:    sh s3, 12(s0)
+; CHECK-SOFT-RV32-NEXT:    sh a0, 14(s0)
+; CHECK-SOFT-RV32-NEXT:    sh s2, 0(s0)
+; CHECK-SOFT-RV32-NEXT:    sh s4, 2(s0)
+; CHECK-SOFT-RV32-NEXT:    sh s6, 4(s0)
+; CHECK-SOFT-RV32-NEXT:    sh s8, 6(s0)
+; CHECK-SOFT-RV32-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s0
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s1
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s2
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s3
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s4
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s5
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s6
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s7
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s8
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, 48
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV32-NEXT:    ret
+;
 ; CHECK-FP16-RV32-LABEL: fcanonicalize_v8f16:
 ; CHECK-FP16-RV32:       # %bb.0:
 ; CHECK-FP16-RV32-NEXT:    fmin.h fa0, fa0, fa0
@@ -954,11 +2261,346 @@ define <8 x half> @fcanonicalize_v8f16(<8 x half> %x) {
 ; CHECK-NOFP16-RV32-NEXT:    addi sp, sp, 112
 ; CHECK-NOFP16-RV32-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NOFP16-RV32-NEXT:    ret
+; RV32-SOFT-LABEL: fcanonicalize_v8f16:
+; RV32-SOFT:       # %bb.0:
+; RV32-SOFT-NEXT:    addi sp, sp, -48
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 48
+; RV32-SOFT-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s0, 40(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s1, 36(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s2, 32(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s3, 28(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s4, 24(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s5, 20(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s6, 16(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s7, 12(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s8, 8(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    .cfi_offset ra, -4
+; RV32-SOFT-NEXT:    .cfi_offset s0, -8
+; RV32-SOFT-NEXT:    .cfi_offset s1, -12
+; RV32-SOFT-NEXT:    .cfi_offset s2, -16
+; RV32-SOFT-NEXT:    .cfi_offset s3, -20
+; RV32-SOFT-NEXT:    .cfi_offset s4, -24
+; RV32-SOFT-NEXT:    .cfi_offset s5, -28
+; RV32-SOFT-NEXT:    .cfi_offset s6, -32
+; RV32-SOFT-NEXT:    .cfi_offset s7, -36
+; RV32-SOFT-NEXT:    .cfi_offset s8, -40
+; RV32-SOFT-NEXT:    lhu s7, 16(a1)
+; RV32-SOFT-NEXT:    lhu s5, 20(a1)
+; RV32-SOFT-NEXT:    lhu s3, 24(a1)
+; RV32-SOFT-NEXT:    lhu s1, 28(a1)
+; RV32-SOFT-NEXT:    lhu a2, 0(a1)
+; RV32-SOFT-NEXT:    lhu s4, 4(a1)
+; RV32-SOFT-NEXT:    lhu s6, 8(a1)
+; RV32-SOFT-NEXT:    lhu s8, 12(a1)
+; RV32-SOFT-NEXT:    mv s0, a0
+; RV32-SOFT-NEXT:    mv a0, a2
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    mv s2, a0
+; RV32-SOFT-NEXT:    mv a0, s4
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    mv s4, a0
+; RV32-SOFT-NEXT:    mv a0, s6
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    mv s6, a0
+; RV32-SOFT-NEXT:    mv a0, s8
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    mv s8, a0
+; RV32-SOFT-NEXT:    mv a0, s7
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    mv s7, a0
+; RV32-SOFT-NEXT:    mv a0, s5
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    mv s5, a0
+; RV32-SOFT-NEXT:    mv a0, s3
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    mv s3, a0
+; RV32-SOFT-NEXT:    mv a0, s1
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    sh s7, 8(s0)
+; RV32-SOFT-NEXT:    sh s5, 10(s0)
+; RV32-SOFT-NEXT:    sh s3, 12(s0)
+; RV32-SOFT-NEXT:    sh a0, 14(s0)
+; RV32-SOFT-NEXT:    sh s2, 0(s0)
+; RV32-SOFT-NEXT:    sh s4, 2(s0)
+; RV32-SOFT-NEXT:    sh s6, 4(s0)
+; RV32-SOFT-NEXT:    sh s8, 6(s0)
+; RV32-SOFT-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    .cfi_restore ra
+; RV32-SOFT-NEXT:    .cfi_restore s0
+; RV32-SOFT-NEXT:    .cfi_restore s1
+; RV32-SOFT-NEXT:    .cfi_restore s2
+; RV32-SOFT-NEXT:    .cfi_restore s3
+; RV32-SOFT-NEXT:    .cfi_restore s4
+; RV32-SOFT-NEXT:    .cfi_restore s5
+; RV32-SOFT-NEXT:    .cfi_restore s6
+; RV32-SOFT-NEXT:    .cfi_restore s7
+; RV32-SOFT-NEXT:    .cfi_restore s8
+; RV32-SOFT-NEXT:    addi sp, sp, 48
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV32-SOFT-NEXT:    ret
   %z = call <8 x half> @llvm.canonicalize.v8f16(<8 x half> %x)
   ret <8 x half> %z
 }
 
 define <8 x half> @fcanonicalize_v8f16_nnan(<8 x half> %x) {
+; RV64-SOFT-LABEL: fcanonicalize_v8f16_nnan:
+; RV64-SOFT:       # %bb.0:
+; RV64-SOFT-NEXT:    addi sp, sp, -80
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 80
+; RV64-SOFT-NEXT:    sd ra, 72(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s0, 64(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s1, 56(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s2, 48(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s3, 40(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s4, 32(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s5, 24(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s6, 16(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s7, 8(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s8, 0(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    .cfi_offset ra, -8
+; RV64-SOFT-NEXT:    .cfi_offset s0, -16
+; RV64-SOFT-NEXT:    .cfi_offset s1, -24
+; RV64-SOFT-NEXT:    .cfi_offset s2, -32
+; RV64-SOFT-NEXT:    .cfi_offset s3, -40
+; RV64-SOFT-NEXT:    .cfi_offset s4, -48
+; RV64-SOFT-NEXT:    .cfi_offset s5, -56
+; RV64-SOFT-NEXT:    .cfi_offset s6, -64
+; RV64-SOFT-NEXT:    .cfi_offset s7, -72
+; RV64-SOFT-NEXT:    .cfi_offset s8, -80
+; RV64-SOFT-NEXT:    lhu s7, 32(a1)
+; RV64-SOFT-NEXT:    lhu s5, 40(a1)
+; RV64-SOFT-NEXT:    lhu s3, 48(a1)
+; RV64-SOFT-NEXT:    lhu s1, 56(a1)
+; RV64-SOFT-NEXT:    lhu a2, 0(a1)
+; RV64-SOFT-NEXT:    lhu s4, 8(a1)
+; RV64-SOFT-NEXT:    lhu s6, 16(a1)
+; RV64-SOFT-NEXT:    lhu s8, 24(a1)
+; RV64-SOFT-NEXT:    mv s0, a0
+; RV64-SOFT-NEXT:    mv a0, a2
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    mv s2, a0
+; RV64-SOFT-NEXT:    mv a0, s4
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    mv s4, a0
+; RV64-SOFT-NEXT:    mv a0, s6
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    mv s6, a0
+; RV64-SOFT-NEXT:    mv a0, s8
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    mv s8, a0
+; RV64-SOFT-NEXT:    mv a0, s7
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    mv s7, a0
+; RV64-SOFT-NEXT:    mv a0, s5
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    mv s5, a0
+; RV64-SOFT-NEXT:    mv a0, s3
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    mv s3, a0
+; RV64-SOFT-NEXT:    mv a0, s1
+; RV64-SOFT-NEXT:    call __extendhfsf2
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    call __truncsfhf2
+; RV64-SOFT-NEXT:    sh s7, 8(s0)
+; RV64-SOFT-NEXT:    sh s5, 10(s0)
+; RV64-SOFT-NEXT:    sh s3, 12(s0)
+; RV64-SOFT-NEXT:    sh a0, 14(s0)
+; RV64-SOFT-NEXT:    sh s2, 0(s0)
+; RV64-SOFT-NEXT:    sh s4, 2(s0)
+; RV64-SOFT-NEXT:    sh s6, 4(s0)
+; RV64-SOFT-NEXT:    sh s8, 6(s0)
+; RV64-SOFT-NEXT:    ld ra, 72(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s0, 64(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s1, 56(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s2, 48(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s3, 40(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s4, 32(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s5, 24(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s6, 16(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s7, 8(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s8, 0(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    .cfi_restore ra
+; RV64-SOFT-NEXT:    .cfi_restore s0
+; RV64-SOFT-NEXT:    .cfi_restore s1
+; RV64-SOFT-NEXT:    .cfi_restore s2
+; RV64-SOFT-NEXT:    .cfi_restore s3
+; RV64-SOFT-NEXT:    .cfi_restore s4
+; RV64-SOFT-NEXT:    .cfi_restore s5
+; RV64-SOFT-NEXT:    .cfi_restore s6
+; RV64-SOFT-NEXT:    .cfi_restore s7
+; RV64-SOFT-NEXT:    .cfi_restore s8
+; RV64-SOFT-NEXT:    addi sp, sp, 80
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV64-SOFT-NEXT:    ret
+;
+; CHECK-SOFT-RV64-LABEL: fcanonicalize_v8f16_nnan:
+; CHECK-SOFT-RV64:       # %bb.0:
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -80
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-SOFT-RV64-NEXT:    sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s1, 56(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s2, 48(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s3, 40(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s4, 32(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s5, 24(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s6, 16(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s7, 8(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s8, 0(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset ra, -8
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s0, -16
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s1, -24
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s2, -32
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s3, -40
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s4, -48
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s5, -56
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s6, -64
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s7, -72
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s8, -80
+; CHECK-SOFT-RV64-NEXT:    lhu s7, 32(a1)
+; CHECK-SOFT-RV64-NEXT:    lhu s5, 40(a1)
+; CHECK-SOFT-RV64-NEXT:    lhu s3, 48(a1)
+; CHECK-SOFT-RV64-NEXT:    lhu s1, 56(a1)
+; CHECK-SOFT-RV64-NEXT:    lhu a2, 0(a1)
+; CHECK-SOFT-RV64-NEXT:    lhu s4, 8(a1)
+; CHECK-SOFT-RV64-NEXT:    lhu s6, 16(a1)
+; CHECK-SOFT-RV64-NEXT:    lhu s8, 24(a1)
+; CHECK-SOFT-RV64-NEXT:    mv s0, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, a2
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    mv s2, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s4
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    mv s4, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s6
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    mv s6, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s8
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    mv s8, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s7
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    mv s7, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s5
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    mv s5, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s3
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    mv s3, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s1
+; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV64-NEXT:    sh s7, 8(s0)
+; CHECK-SOFT-RV64-NEXT:    sh s5, 10(s0)
+; CHECK-SOFT-RV64-NEXT:    sh s3, 12(s0)
+; CHECK-SOFT-RV64-NEXT:    sh a0, 14(s0)
+; CHECK-SOFT-RV64-NEXT:    sh s2, 0(s0)
+; CHECK-SOFT-RV64-NEXT:    sh s4, 2(s0)
+; CHECK-SOFT-RV64-NEXT:    sh s6, 4(s0)
+; CHECK-SOFT-RV64-NEXT:    sh s8, 6(s0)
+; CHECK-SOFT-RV64-NEXT:    ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s1, 56(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s2, 48(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s3, 40(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s4, 32(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s5, 24(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s6, 16(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s7, 8(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s8, 0(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s0
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s1
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s2
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s3
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s4
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s5
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s6
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s7
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s8
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, 80
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV64-NEXT:    ret
+;
 ; CHECK-FP16-RV64-LABEL: fcanonicalize_v8f16_nnan:
 ; CHECK-FP16-RV64:       # %bb.0:
 ; CHECK-FP16-RV64-NEXT:    fmin.h fa0, fa0, fa0
@@ -1115,6 +2757,118 @@ define <8 x half> @fcanonicalize_v8f16_nnan(<8 x half> %x) {
 ; CHECK-NOFP16-RV64-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NOFP16-RV64-NEXT:    ret
 ;
+; CHECK-SOFT-RV32-LABEL: fcanonicalize_v8f16_nnan:
+; CHECK-SOFT-RV32:       # %bb.0:
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, -48
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-SOFT-RV32-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s0, 40(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s1, 36(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s2, 32(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s3, 28(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s4, 24(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s5, 20(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s6, 16(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s7, 12(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s8, 8(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset ra, -4
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s0, -8
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s1, -12
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s2, -16
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s3, -20
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s4, -24
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s5, -28
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s6, -32
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s7, -36
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s8, -40
+; CHECK-SOFT-RV32-NEXT:    lhu s7, 16(a1)
+; CHECK-SOFT-RV32-NEXT:    lhu s5, 20(a1)
+; CHECK-SOFT-RV32-NEXT:    lhu s3, 24(a1)
+; CHECK-SOFT-RV32-NEXT:    lhu s1, 28(a1)
+; CHECK-SOFT-RV32-NEXT:    lhu a2, 0(a1)
+; CHECK-SOFT-RV32-NEXT:    lhu s4, 4(a1)
+; CHECK-SOFT-RV32-NEXT:    lhu s6, 8(a1)
+; CHECK-SOFT-RV32-NEXT:    lhu s8, 12(a1)
+; CHECK-SOFT-RV32-NEXT:    mv s0, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, a2
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    mv s2, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s4
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    mv s4, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s6
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    mv s6, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s8
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    mv s8, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s7
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    mv s7, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s5
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    mv s5, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s3
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    mv s3, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s1
+; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
+; CHECK-SOFT-RV32-NEXT:    sh s7, 8(s0)
+; CHECK-SOFT-RV32-NEXT:    sh s5, 10(s0)
+; CHECK-SOFT-RV32-NEXT:    sh s3, 12(s0)
+; CHECK-SOFT-RV32-NEXT:    sh a0, 14(s0)
+; CHECK-SOFT-RV32-NEXT:    sh s2, 0(s0)
+; CHECK-SOFT-RV32-NEXT:    sh s4, 2(s0)
+; CHECK-SOFT-RV32-NEXT:    sh s6, 4(s0)
+; CHECK-SOFT-RV32-NEXT:    sh s8, 6(s0)
+; CHECK-SOFT-RV32-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s0
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s1
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s2
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s3
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s4
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s5
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s6
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s7
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s8
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, 48
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV32-NEXT:    ret
+;
 ; CHECK-FP16-RV32-LABEL: fcanonicalize_v8f16_nnan:
 ; CHECK-FP16-RV32:       # %bb.0:
 ; CHECK-FP16-RV32-NEXT:    fmin.h fa0, fa0, fa0
@@ -1281,114 +3035,1899 @@ define <8 x half> @fcanonicalize_v8f16_nnan(<8 x half> %x) {
 ; CHECK-NOFP16-RV32-NEXT:    addi sp, sp, 112
 ; CHECK-NOFP16-RV32-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NOFP16-RV32-NEXT:    ret
+; RV32-SOFT-LABEL: fcanonicalize_v8f16_nnan:
+; RV32-SOFT:       # %bb.0:
+; RV32-SOFT-NEXT:    addi sp, sp, -48
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 48
+; RV32-SOFT-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s0, 40(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s1, 36(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s2, 32(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s3, 28(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s4, 24(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s5, 20(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s6, 16(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s7, 12(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s8, 8(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    .cfi_offset ra, -4
+; RV32-SOFT-NEXT:    .cfi_offset s0, -8
+; RV32-SOFT-NEXT:    .cfi_offset s1, -12
+; RV32-SOFT-NEXT:    .cfi_offset s2, -16
+; RV32-SOFT-NEXT:    .cfi_offset s3, -20
+; RV32-SOFT-NEXT:    .cfi_offset s4, -24
+; RV32-SOFT-NEXT:    .cfi_offset s5, -28
+; RV32-SOFT-NEXT:    .cfi_offset s6, -32
+; RV32-SOFT-NEXT:    .cfi_offset s7, -36
+; RV32-SOFT-NEXT:    .cfi_offset s8, -40
+; RV32-SOFT-NEXT:    lhu s7, 16(a1)
+; RV32-SOFT-NEXT:    lhu s5, 20(a1)
+; RV32-SOFT-NEXT:    lhu s3, 24(a1)
+; RV32-SOFT-NEXT:    lhu s1, 28(a1)
+; RV32-SOFT-NEXT:    lhu a2, 0(a1)
+; RV32-SOFT-NEXT:    lhu s4, 4(a1)
+; RV32-SOFT-NEXT:    lhu s6, 8(a1)
+; RV32-SOFT-NEXT:    lhu s8, 12(a1)
+; RV32-SOFT-NEXT:    mv s0, a0
+; RV32-SOFT-NEXT:    mv a0, a2
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    mv s2, a0
+; RV32-SOFT-NEXT:    mv a0, s4
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    mv s4, a0
+; RV32-SOFT-NEXT:    mv a0, s6
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    mv s6, a0
+; RV32-SOFT-NEXT:    mv a0, s8
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    mv s8, a0
+; RV32-SOFT-NEXT:    mv a0, s7
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    mv s7, a0
+; RV32-SOFT-NEXT:    mv a0, s5
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    mv s5, a0
+; RV32-SOFT-NEXT:    mv a0, s3
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    mv s3, a0
+; RV32-SOFT-NEXT:    mv a0, s1
+; RV32-SOFT-NEXT:    call __extendhfsf2
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    call __truncsfhf2
+; RV32-SOFT-NEXT:    sh s7, 8(s0)
+; RV32-SOFT-NEXT:    sh s5, 10(s0)
+; RV32-SOFT-NEXT:    sh s3, 12(s0)
+; RV32-SOFT-NEXT:    sh a0, 14(s0)
+; RV32-SOFT-NEXT:    sh s2, 0(s0)
+; RV32-SOFT-NEXT:    sh s4, 2(s0)
+; RV32-SOFT-NEXT:    sh s6, 4(s0)
+; RV32-SOFT-NEXT:    sh s8, 6(s0)
+; RV32-SOFT-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    .cfi_restore ra
+; RV32-SOFT-NEXT:    .cfi_restore s0
+; RV32-SOFT-NEXT:    .cfi_restore s1
+; RV32-SOFT-NEXT:    .cfi_restore s2
+; RV32-SOFT-NEXT:    .cfi_restore s3
+; RV32-SOFT-NEXT:    .cfi_restore s4
+; RV32-SOFT-NEXT:    .cfi_restore s5
+; RV32-SOFT-NEXT:    .cfi_restore s6
+; RV32-SOFT-NEXT:    .cfi_restore s7
+; RV32-SOFT-NEXT:    .cfi_restore s8
+; RV32-SOFT-NEXT:    addi sp, sp, 48
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV32-SOFT-NEXT:    ret
   %z = call nnan <8 x half> @llvm.canonicalize.v8f16(<8 x half> %x)
   ret <8 x half> %z
 }
 
 define float @fcanonicalize_f32(float %x) {
-; CHECK-LABEL: fcanonicalize_f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmin.s fa0, fa0, fa0
-; CHECK-NEXT:    ret
+; RV64-SOFT-LABEL: fcanonicalize_f32:
+; RV64-SOFT:       # %bb.0:
+; RV64-SOFT-NEXT:    addi sp, sp, -16
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 16
+; RV64-SOFT-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    .cfi_offset ra, -8
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    .cfi_restore ra
+; RV64-SOFT-NEXT:    addi sp, sp, 16
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV64-SOFT-NEXT:    ret
+;
+; CHECK-SOFT-RV64-LABEL: fcanonicalize_f32:
+; CHECK-SOFT-RV64:       # %bb.0:
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -16
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SOFT-RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset ra, -8
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, 16
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV64-NEXT:    ret
+;
+; CHECK-FP16-RV64-LABEL: fcanonicalize_f32:
+; CHECK-FP16-RV64:       # %bb.0:
+; CHECK-FP16-RV64-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-FP16-RV64-NEXT:    ret
+;
+; CHECK-NOFP16-RV64-LABEL: fcanonicalize_f32:
+; CHECK-NOFP16-RV64:       # %bb.0:
+; CHECK-NOFP16-RV64-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-NOFP16-RV64-NEXT:    ret
+;
+; CHECK-SOFT-RV32-LABEL: fcanonicalize_f32:
+; CHECK-SOFT-RV32:       # %bb.0:
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, -16
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SOFT-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset ra, -4
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, 16
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV32-NEXT:    ret
+;
+; CHECK-FP16-RV32-LABEL: fcanonicalize_f32:
+; CHECK-FP16-RV32:       # %bb.0:
+; CHECK-FP16-RV32-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-FP16-RV32-NEXT:    ret
+;
+; CHECK-NOFP16-RV32-LABEL: fcanonicalize_f32:
+; CHECK-NOFP16-RV32:       # %bb.0:
+; CHECK-NOFP16-RV32-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-NOFP16-RV32-NEXT:    ret
+; RV32-SOFT-LABEL: fcanonicalize_f32:
+; RV32-SOFT:       # %bb.0:
+; RV32-SOFT-NEXT:    addi sp, sp, -16
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 16
+; RV32-SOFT-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    .cfi_offset ra, -4
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    .cfi_restore ra
+; RV32-SOFT-NEXT:    addi sp, sp, 16
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV32-SOFT-NEXT:    ret
   %z = call float @llvm.canonicalize.f32(float %x)
   ret float %z
 }
 
 define float @fcanonicalize_f32_nnan(float %x) {
-; CHECK-LABEL: fcanonicalize_f32_nnan:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmin.s fa0, fa0, fa0
-; CHECK-NEXT:    ret
+; RV64-SOFT-LABEL: fcanonicalize_f32_nnan:
+; RV64-SOFT:       # %bb.0:
+; RV64-SOFT-NEXT:    addi sp, sp, -16
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 16
+; RV64-SOFT-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    .cfi_offset ra, -8
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    .cfi_restore ra
+; RV64-SOFT-NEXT:    addi sp, sp, 16
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV64-SOFT-NEXT:    ret
+;
+; CHECK-SOFT-RV64-LABEL: fcanonicalize_f32_nnan:
+; CHECK-SOFT-RV64:       # %bb.0:
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -16
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SOFT-RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset ra, -8
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, 16
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV64-NEXT:    ret
+;
+; CHECK-FP16-RV64-LABEL: fcanonicalize_f32_nnan:
+; CHECK-FP16-RV64:       # %bb.0:
+; CHECK-FP16-RV64-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-FP16-RV64-NEXT:    ret
+;
+; CHECK-NOFP16-RV64-LABEL: fcanonicalize_f32_nnan:
+; CHECK-NOFP16-RV64:       # %bb.0:
+; CHECK-NOFP16-RV64-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-NOFP16-RV64-NEXT:    ret
+;
+; CHECK-SOFT-RV32-LABEL: fcanonicalize_f32_nnan:
+; CHECK-SOFT-RV32:       # %bb.0:
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, -16
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SOFT-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset ra, -4
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, 16
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV32-NEXT:    ret
+;
+; CHECK-FP16-RV32-LABEL: fcanonicalize_f32_nnan:
+; CHECK-FP16-RV32:       # %bb.0:
+; CHECK-FP16-RV32-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-FP16-RV32-NEXT:    ret
+;
+; CHECK-NOFP16-RV32-LABEL: fcanonicalize_f32_nnan:
+; CHECK-NOFP16-RV32:       # %bb.0:
+; CHECK-NOFP16-RV32-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-NOFP16-RV32-NEXT:    ret
+; RV32-SOFT-LABEL: fcanonicalize_f32_nnan:
+; RV32-SOFT:       # %bb.0:
+; RV32-SOFT-NEXT:    addi sp, sp, -16
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 16
+; RV32-SOFT-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    .cfi_offset ra, -4
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    .cfi_restore ra
+; RV32-SOFT-NEXT:    addi sp, sp, 16
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV32-SOFT-NEXT:    ret
   %z = call nnan float @llvm.canonicalize.f32(float %x)
   ret float %z
 }
 
 define <2 x float> @fcanonicalize_v2f32(<2 x float> %x) {
-; CHECK-LABEL: fcanonicalize_v2f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmin.s fa0, fa0, fa0
-; CHECK-NEXT:    fmin.s fa1, fa1, fa1
-; CHECK-NEXT:    ret
+; RV64-SOFT-LABEL: fcanonicalize_v2f32:
+; RV64-SOFT:       # %bb.0:
+; RV64-SOFT-NEXT:    addi sp, sp, -32
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 32
+; RV64-SOFT-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    .cfi_offset ra, -8
+; RV64-SOFT-NEXT:    .cfi_offset s0, -16
+; RV64-SOFT-NEXT:    .cfi_offset s1, -24
+; RV64-SOFT-NEXT:    mv s0, a1
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    mv s1, a0
+; RV64-SOFT-NEXT:    mv a0, s0
+; RV64-SOFT-NEXT:    mv a1, s0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    mv a0, s1
+; RV64-SOFT-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    .cfi_restore ra
+; RV64-SOFT-NEXT:    .cfi_restore s0
+; RV64-SOFT-NEXT:    .cfi_restore s1
+; RV64-SOFT-NEXT:    addi sp, sp, 32
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV64-SOFT-NEXT:    ret
+;
+; CHECK-SOFT-RV64-LABEL: fcanonicalize_v2f32:
+; CHECK-SOFT-RV64:       # %bb.0:
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -32
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SOFT-RV64-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset ra, -8
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s0, -16
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s1, -24
+; CHECK-SOFT-RV64-NEXT:    mv s0, a1
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    mv s1, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s0
+; CHECK-SOFT-RV64-NEXT:    mv a1, s0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s1
+; CHECK-SOFT-RV64-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s0
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s1
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, 32
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV64-NEXT:    ret
+;
+; CHECK-FP16-RV64-LABEL: fcanonicalize_v2f32:
+; CHECK-FP16-RV64:       # %bb.0:
+; CHECK-FP16-RV64-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-FP16-RV64-NEXT:    fmin.s fa1, fa1, fa1
+; CHECK-FP16-RV64-NEXT:    ret
+;
+; CHECK-NOFP16-RV64-LABEL: fcanonicalize_v2f32:
+; CHECK-NOFP16-RV64:       # %bb.0:
+; CHECK-NOFP16-RV64-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-NOFP16-RV64-NEXT:    fmin.s fa1, fa1, fa1
+; CHECK-NOFP16-RV64-NEXT:    ret
+;
+; CHECK-SOFT-RV32-LABEL: fcanonicalize_v2f32:
+; CHECK-SOFT-RV32:       # %bb.0:
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, -16
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SOFT-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset ra, -4
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s0, -8
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s1, -12
+; CHECK-SOFT-RV32-NEXT:    mv s0, a1
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    mv s1, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s0
+; CHECK-SOFT-RV32-NEXT:    mv a1, s0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s1
+; CHECK-SOFT-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s0
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s1
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, 16
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV32-NEXT:    ret
+;
+; CHECK-FP16-RV32-LABEL: fcanonicalize_v2f32:
+; CHECK-FP16-RV32:       # %bb.0:
+; CHECK-FP16-RV32-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-FP16-RV32-NEXT:    fmin.s fa1, fa1, fa1
+; CHECK-FP16-RV32-NEXT:    ret
+;
+; CHECK-NOFP16-RV32-LABEL: fcanonicalize_v2f32:
+; CHECK-NOFP16-RV32:       # %bb.0:
+; CHECK-NOFP16-RV32-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-NOFP16-RV32-NEXT:    fmin.s fa1, fa1, fa1
+; CHECK-NOFP16-RV32-NEXT:    ret
+; RV32-SOFT-LABEL: fcanonicalize_v2f32:
+; RV32-SOFT:       # %bb.0:
+; RV32-SOFT-NEXT:    addi sp, sp, -16
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 16
+; RV32-SOFT-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    .cfi_offset ra, -4
+; RV32-SOFT-NEXT:    .cfi_offset s0, -8
+; RV32-SOFT-NEXT:    .cfi_offset s1, -12
+; RV32-SOFT-NEXT:    mv s0, a1
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    mv s1, a0
+; RV32-SOFT-NEXT:    mv a0, s0
+; RV32-SOFT-NEXT:    mv a1, s0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    mv a0, s1
+; RV32-SOFT-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    .cfi_restore ra
+; RV32-SOFT-NEXT:    .cfi_restore s0
+; RV32-SOFT-NEXT:    .cfi_restore s1
+; RV32-SOFT-NEXT:    addi sp, sp, 16
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV32-SOFT-NEXT:    ret
   %z = call <2 x float> @llvm.canonicalize.v2f32(<2 x float> %x)
   ret <2 x float> %z
 }
 
 define <2 x float> @fcanonicalize_v2f32_nnan(<2 x float> %x) {
-; CHECK-LABEL: fcanonicalize_v2f32_nnan:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmin.s fa0, fa0, fa0
-; CHECK-NEXT:    fmin.s fa1, fa1, fa1
-; CHECK-NEXT:    ret
+; RV64-SOFT-LABEL: fcanonicalize_v2f32_nnan:
+; RV64-SOFT:       # %bb.0:
+; RV64-SOFT-NEXT:    addi sp, sp, -32
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 32
+; RV64-SOFT-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    .cfi_offset ra, -8
+; RV64-SOFT-NEXT:    .cfi_offset s0, -16
+; RV64-SOFT-NEXT:    .cfi_offset s1, -24
+; RV64-SOFT-NEXT:    mv s0, a1
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    mv s1, a0
+; RV64-SOFT-NEXT:    mv a0, s0
+; RV64-SOFT-NEXT:    mv a1, s0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    mv a0, s1
+; RV64-SOFT-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    .cfi_restore ra
+; RV64-SOFT-NEXT:    .cfi_restore s0
+; RV64-SOFT-NEXT:    .cfi_restore s1
+; RV64-SOFT-NEXT:    addi sp, sp, 32
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV64-SOFT-NEXT:    ret
+;
+; CHECK-SOFT-RV64-LABEL: fcanonicalize_v2f32_nnan:
+; CHECK-SOFT-RV64:       # %bb.0:
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -32
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SOFT-RV64-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset ra, -8
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s0, -16
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s1, -24
+; CHECK-SOFT-RV64-NEXT:    mv s0, a1
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    mv s1, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s0
+; CHECK-SOFT-RV64-NEXT:    mv a1, s0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s1
+; CHECK-SOFT-RV64-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s0
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s1
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, 32
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV64-NEXT:    ret
+;
+; CHECK-FP16-RV64-LABEL: fcanonicalize_v2f32_nnan:
+; CHECK-FP16-RV64:       # %bb.0:
+; CHECK-FP16-RV64-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-FP16-RV64-NEXT:    fmin.s fa1, fa1, fa1
+; CHECK-FP16-RV64-NEXT:    ret
+;
+; CHECK-NOFP16-RV64-LABEL: fcanonicalize_v2f32_nnan:
+; CHECK-NOFP16-RV64:       # %bb.0:
+; CHECK-NOFP16-RV64-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-NOFP16-RV64-NEXT:    fmin.s fa1, fa1, fa1
+; CHECK-NOFP16-RV64-NEXT:    ret
+;
+; CHECK-SOFT-RV32-LABEL: fcanonicalize_v2f32_nnan:
+; CHECK-SOFT-RV32:       # %bb.0:
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, -16
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SOFT-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset ra, -4
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s0, -8
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s1, -12
+; CHECK-SOFT-RV32-NEXT:    mv s0, a1
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    mv s1, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s0
+; CHECK-SOFT-RV32-NEXT:    mv a1, s0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    mv a1, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s1
+; CHECK-SOFT-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s0
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s1
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, 16
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV32-NEXT:    ret
+;
+; CHECK-FP16-RV32-LABEL: fcanonicalize_v2f32_nnan:
+; CHECK-FP16-RV32:       # %bb.0:
+; CHECK-FP16-RV32-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-FP16-RV32-NEXT:    fmin.s fa1, fa1, fa1
+; CHECK-FP16-RV32-NEXT:    ret
+;
+; CHECK-NOFP16-RV32-LABEL: fcanonicalize_v2f32_nnan:
+; CHECK-NOFP16-RV32:       # %bb.0:
+; CHECK-NOFP16-RV32-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-NOFP16-RV32-NEXT:    fmin.s fa1, fa1, fa1
+; CHECK-NOFP16-RV32-NEXT:    ret
+; RV32-SOFT-LABEL: fcanonicalize_v2f32_nnan:
+; RV32-SOFT:       # %bb.0:
+; RV32-SOFT-NEXT:    addi sp, sp, -16
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 16
+; RV32-SOFT-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    .cfi_offset ra, -4
+; RV32-SOFT-NEXT:    .cfi_offset s0, -8
+; RV32-SOFT-NEXT:    .cfi_offset s1, -12
+; RV32-SOFT-NEXT:    mv s0, a1
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    mv s1, a0
+; RV32-SOFT-NEXT:    mv a0, s0
+; RV32-SOFT-NEXT:    mv a1, s0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    mv a1, a0
+; RV32-SOFT-NEXT:    mv a0, s1
+; RV32-SOFT-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    .cfi_restore ra
+; RV32-SOFT-NEXT:    .cfi_restore s0
+; RV32-SOFT-NEXT:    .cfi_restore s1
+; RV32-SOFT-NEXT:    addi sp, sp, 16
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV32-SOFT-NEXT:    ret
   %z = call nnan <2 x float> @llvm.canonicalize.v2f32(<2 x float> %x)
   ret <2 x float> %z
 }
 
 define <4 x float> @fcanonicalize_v4f32(<4 x float> %x) {
-; CHECK-LABEL: fcanonicalize_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmin.s fa5, fa0, fa0
-; CHECK-NEXT:    fmin.s fa4, fa1, fa1
-; CHECK-NEXT:    fmin.s fa2, fa2, fa2
-; CHECK-NEXT:    fmin.s fa3, fa3, fa3
-; CHECK-NEXT:    fsw fa5, 0(a0)
-; CHECK-NEXT:    fsw fa4, 4(a0)
-; CHECK-NEXT:    fsw fa2, 8(a0)
-; CHECK-NEXT:    fsw fa3, 12(a0)
-; CHECK-NEXT:    ret
+; RV64-SOFT-LABEL: fcanonicalize_v4f32:
+; RV64-SOFT:       # %bb.0:
+; RV64-SOFT-NEXT:    addi sp, sp, -48
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 48
+; RV64-SOFT-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    .cfi_offset ra, -8
+; RV64-SOFT-NEXT:    .cfi_offset s0, -16
+; RV64-SOFT-NEXT:    .cfi_offset s1, -24
+; RV64-SOFT-NEXT:    .cfi_offset s2, -32
+; RV64-SOFT-NEXT:    .cfi_offset s3, -40
+; RV64-SOFT-NEXT:    .cfi_offset s4, -48
+; RV64-SOFT-NEXT:    lw a2, 0(a1)
+; RV64-SOFT-NEXT:    lw s0, 8(a1)
+; RV64-SOFT-NEXT:    lw s1, 16(a1)
+; RV64-SOFT-NEXT:    lw s2, 24(a1)
+; RV64-SOFT-NEXT:    mv s3, a0
+; RV64-SOFT-NEXT:    mv a0, a2
+; RV64-SOFT-NEXT:    mv a1, a2
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    mv s4, a0
+; RV64-SOFT-NEXT:    mv a0, s0
+; RV64-SOFT-NEXT:    mv a1, s0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    mv s0, a0
+; RV64-SOFT-NEXT:    mv a0, s1
+; RV64-SOFT-NEXT:    mv a1, s1
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    mv s1, a0
+; RV64-SOFT-NEXT:    mv a0, s2
+; RV64-SOFT-NEXT:    mv a1, s2
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    sw s4, 0(s3)
+; RV64-SOFT-NEXT:    sw s0, 4(s3)
+; RV64-SOFT-NEXT:    sw s1, 8(s3)
+; RV64-SOFT-NEXT:    sw a0, 12(s3)
+; RV64-SOFT-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    .cfi_restore ra
+; RV64-SOFT-NEXT:    .cfi_restore s0
+; RV64-SOFT-NEXT:    .cfi_restore s1
+; RV64-SOFT-NEXT:    .cfi_restore s2
+; RV64-SOFT-NEXT:    .cfi_restore s3
+; RV64-SOFT-NEXT:    .cfi_restore s4
+; RV64-SOFT-NEXT:    addi sp, sp, 48
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV64-SOFT-NEXT:    ret
+;
+; CHECK-SOFT-RV64-LABEL: fcanonicalize_v4f32:
+; CHECK-SOFT-RV64:       # %bb.0:
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -48
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-SOFT-RV64-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset ra, -8
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s0, -16
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s1, -24
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s2, -32
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s3, -40
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s4, -48
+; CHECK-SOFT-RV64-NEXT:    lw a2, 0(a1)
+; CHECK-SOFT-RV64-NEXT:    lw s0, 8(a1)
+; CHECK-SOFT-RV64-NEXT:    lw s1, 16(a1)
+; CHECK-SOFT-RV64-NEXT:    lw s2, 24(a1)
+; CHECK-SOFT-RV64-NEXT:    mv s3, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, a2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a2
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    mv s4, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s0
+; CHECK-SOFT-RV64-NEXT:    mv a1, s0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    mv s0, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s1
+; CHECK-SOFT-RV64-NEXT:    mv a1, s1
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    mv s1, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s2
+; CHECK-SOFT-RV64-NEXT:    mv a1, s2
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    sw s4, 0(s3)
+; CHECK-SOFT-RV64-NEXT:    sw s0, 4(s3)
+; CHECK-SOFT-RV64-NEXT:    sw s1, 8(s3)
+; CHECK-SOFT-RV64-NEXT:    sw a0, 12(s3)
+; CHECK-SOFT-RV64-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s0
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s1
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s2
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s3
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s4
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, 48
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV64-NEXT:    ret
+;
+; CHECK-FP16-RV64-LABEL: fcanonicalize_v4f32:
+; CHECK-FP16-RV64:       # %bb.0:
+; CHECK-FP16-RV64-NEXT:    fmin.s fa5, fa0, fa0
+; CHECK-FP16-RV64-NEXT:    fmin.s fa4, fa1, fa1
+; CHECK-FP16-RV64-NEXT:    fmin.s fa2, fa2, fa2
+; CHECK-FP16-RV64-NEXT:    fmin.s fa3, fa3, fa3
+; CHECK-FP16-RV64-NEXT:    fsw fa5, 0(a0)
+; CHECK-FP16-RV64-NEXT:    fsw fa4, 4(a0)
+; CHECK-FP16-RV64-NEXT:    fsw fa2, 8(a0)
+; CHECK-FP16-RV64-NEXT:    fsw fa3, 12(a0)
+; CHECK-FP16-RV64-NEXT:    ret
+;
+; CHECK-NOFP16-RV64-LABEL: fcanonicalize_v4f32:
+; CHECK-NOFP16-RV64:       # %bb.0:
+; CHECK-NOFP16-RV64-NEXT:    fmin.s fa5, fa0, fa0
+; CHECK-NOFP16-RV64-NEXT:    fmin.s fa4, fa1, fa1
+; CHECK-NOFP16-RV64-NEXT:    fmin.s fa2, fa2, fa2
+; CHECK-NOFP16-RV64-NEXT:    fmin.s fa3, fa3, fa3
+; CHECK-NOFP16-RV64-NEXT:    fsw fa5, 0(a0)
+; CHECK-NOFP16-RV64-NEXT:    fsw fa4, 4(a0)
+; CHECK-NOFP16-RV64-NEXT:    fsw fa2, 8(a0)
+; CHECK-NOFP16-RV64-NEXT:    fsw fa3, 12(a0)
+; CHECK-NOFP16-RV64-NEXT:    ret
+;
+; CHECK-SOFT-RV32-LABEL: fcanonicalize_v4f32:
+; CHECK-SOFT-RV32:       # %bb.0:
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, -32
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SOFT-RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset ra, -4
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s0, -8
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s1, -12
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s2, -16
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s3, -20
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s4, -24
+; CHECK-SOFT-RV32-NEXT:    lw a2, 0(a1)
+; CHECK-SOFT-RV32-NEXT:    lw s0, 4(a1)
+; CHECK-SOFT-RV32-NEXT:    lw s1, 8(a1)
+; CHECK-SOFT-RV32-NEXT:    lw s2, 12(a1)
+; CHECK-SOFT-RV32-NEXT:    mv s3, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, a2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a2
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    mv s4, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s0
+; CHECK-SOFT-RV32-NEXT:    mv a1, s0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    mv s0, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s1
+; CHECK-SOFT-RV32-NEXT:    mv a1, s1
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    mv s1, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s2
+; CHECK-SOFT-RV32-NEXT:    mv a1, s2
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    sw s4, 0(s3)
+; CHECK-SOFT-RV32-NEXT:    sw s0, 4(s3)
+; CHECK-SOFT-RV32-NEXT:    sw s1, 8(s3)
+; CHECK-SOFT-RV32-NEXT:    sw a0, 12(s3)
+; CHECK-SOFT-RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s0
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s1
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s2
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s3
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s4
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, 32
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV32-NEXT:    ret
+;
+; CHECK-FP16-RV32-LABEL: fcanonicalize_v4f32:
+; CHECK-FP16-RV32:       # %bb.0:
+; CHECK-FP16-RV32-NEXT:    fmin.s fa5, fa0, fa0
+; CHECK-FP16-RV32-NEXT:    fmin.s fa4, fa1, fa1
+; CHECK-FP16-RV32-NEXT:    fmin.s fa2, fa2, fa2
+; CHECK-FP16-RV32-NEXT:    fmin.s fa3, fa3, fa3
+; CHECK-FP16-RV32-NEXT:    fsw fa5, 0(a0)
+; CHECK-FP16-RV32-NEXT:    fsw fa4, 4(a0)
+; CHECK-FP16-RV32-NEXT:    fsw fa2, 8(a0)
+; CHECK-FP16-RV32-NEXT:    fsw fa3, 12(a0)
+; CHECK-FP16-RV32-NEXT:    ret
+;
+; CHECK-NOFP16-RV32-LABEL: fcanonicalize_v4f32:
+; CHECK-NOFP16-RV32:       # %bb.0:
+; CHECK-NOFP16-RV32-NEXT:    fmin.s fa5, fa0, fa0
+; CHECK-NOFP16-RV32-NEXT:    fmin.s fa4, fa1, fa1
+; CHECK-NOFP16-RV32-NEXT:    fmin.s fa2, fa2, fa2
+; CHECK-NOFP16-RV32-NEXT:    fmin.s fa3, fa3, fa3
+; CHECK-NOFP16-RV32-NEXT:    fsw fa5, 0(a0)
+; CHECK-NOFP16-RV32-NEXT:    fsw fa4, 4(a0)
+; CHECK-NOFP16-RV32-NEXT:    fsw fa2, 8(a0)
+; CHECK-NOFP16-RV32-NEXT:    fsw fa3, 12(a0)
+; CHECK-NOFP16-RV32-NEXT:    ret
+; RV32-SOFT-LABEL: fcanonicalize_v4f32:
+; RV32-SOFT:       # %bb.0:
+; RV32-SOFT-NEXT:    addi sp, sp, -32
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 32
+; RV32-SOFT-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    .cfi_offset ra, -4
+; RV32-SOFT-NEXT:    .cfi_offset s0, -8
+; RV32-SOFT-NEXT:    .cfi_offset s1, -12
+; RV32-SOFT-NEXT:    .cfi_offset s2, -16
+; RV32-SOFT-NEXT:    .cfi_offset s3, -20
+; RV32-SOFT-NEXT:    .cfi_offset s4, -24
+; RV32-SOFT-NEXT:    lw a2, 0(a1)
+; RV32-SOFT-NEXT:    lw s0, 4(a1)
+; RV32-SOFT-NEXT:    lw s1, 8(a1)
+; RV32-SOFT-NEXT:    lw s2, 12(a1)
+; RV32-SOFT-NEXT:    mv s3, a0
+; RV32-SOFT-NEXT:    mv a0, a2
+; RV32-SOFT-NEXT:    mv a1, a2
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    mv s4, a0
+; RV32-SOFT-NEXT:    mv a0, s0
+; RV32-SOFT-NEXT:    mv a1, s0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    mv s0, a0
+; RV32-SOFT-NEXT:    mv a0, s1
+; RV32-SOFT-NEXT:    mv a1, s1
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    mv s1, a0
+; RV32-SOFT-NEXT:    mv a0, s2
+; RV32-SOFT-NEXT:    mv a1, s2
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    sw s4, 0(s3)
+; RV32-SOFT-NEXT:    sw s0, 4(s3)
+; RV32-SOFT-NEXT:    sw s1, 8(s3)
+; RV32-SOFT-NEXT:    sw a0, 12(s3)
+; RV32-SOFT-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    .cfi_restore ra
+; RV32-SOFT-NEXT:    .cfi_restore s0
+; RV32-SOFT-NEXT:    .cfi_restore s1
+; RV32-SOFT-NEXT:    .cfi_restore s2
+; RV32-SOFT-NEXT:    .cfi_restore s3
+; RV32-SOFT-NEXT:    .cfi_restore s4
+; RV32-SOFT-NEXT:    addi sp, sp, 32
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV32-SOFT-NEXT:    ret
   %z = call <4 x float> @llvm.canonicalize.v4f32(<4 x float> %x)
   ret <4 x float> %z
 }
 
 define <4 x float> @fcanonicalize_v4f32_nnan(<4 x float> %x) {
-; CHECK-LABEL: fcanonicalize_v4f32_nnan:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmin.s fa5, fa0, fa0
-; CHECK-NEXT:    fmin.s fa4, fa1, fa1
-; CHECK-NEXT:    fmin.s fa2, fa2, fa2
-; CHECK-NEXT:    fmin.s fa3, fa3, fa3
-; CHECK-NEXT:    fsw fa5, 0(a0)
-; CHECK-NEXT:    fsw fa4, 4(a0)
-; CHECK-NEXT:    fsw fa2, 8(a0)
-; CHECK-NEXT:    fsw fa3, 12(a0)
-; CHECK-NEXT:    ret
+; RV64-SOFT-LABEL: fcanonicalize_v4f32_nnan:
+; RV64-SOFT:       # %bb.0:
+; RV64-SOFT-NEXT:    addi sp, sp, -48
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 48
+; RV64-SOFT-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    .cfi_offset ra, -8
+; RV64-SOFT-NEXT:    .cfi_offset s0, -16
+; RV64-SOFT-NEXT:    .cfi_offset s1, -24
+; RV64-SOFT-NEXT:    .cfi_offset s2, -32
+; RV64-SOFT-NEXT:    .cfi_offset s3, -40
+; RV64-SOFT-NEXT:    .cfi_offset s4, -48
+; RV64-SOFT-NEXT:    lw a2, 0(a1)
+; RV64-SOFT-NEXT:    lw s0, 8(a1)
+; RV64-SOFT-NEXT:    lw s1, 16(a1)
+; RV64-SOFT-NEXT:    lw s2, 24(a1)
+; RV64-SOFT-NEXT:    mv s3, a0
+; RV64-SOFT-NEXT:    mv a0, a2
+; RV64-SOFT-NEXT:    mv a1, a2
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    mv s4, a0
+; RV64-SOFT-NEXT:    mv a0, s0
+; RV64-SOFT-NEXT:    mv a1, s0
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    mv s0, a0
+; RV64-SOFT-NEXT:    mv a0, s1
+; RV64-SOFT-NEXT:    mv a1, s1
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    mv s1, a0
+; RV64-SOFT-NEXT:    mv a0, s2
+; RV64-SOFT-NEXT:    mv a1, s2
+; RV64-SOFT-NEXT:    call fminimum_numf
+; RV64-SOFT-NEXT:    sw s4, 0(s3)
+; RV64-SOFT-NEXT:    sw s0, 4(s3)
+; RV64-SOFT-NEXT:    sw s1, 8(s3)
+; RV64-SOFT-NEXT:    sw a0, 12(s3)
+; RV64-SOFT-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    .cfi_restore ra
+; RV64-SOFT-NEXT:    .cfi_restore s0
+; RV64-SOFT-NEXT:    .cfi_restore s1
+; RV64-SOFT-NEXT:    .cfi_restore s2
+; RV64-SOFT-NEXT:    .cfi_restore s3
+; RV64-SOFT-NEXT:    .cfi_restore s4
+; RV64-SOFT-NEXT:    addi sp, sp, 48
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV64-SOFT-NEXT:    ret
+;
+; CHECK-SOFT-RV64-LABEL: fcanonicalize_v4f32_nnan:
+; CHECK-SOFT-RV64:       # %bb.0:
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -48
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-SOFT-RV64-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset ra, -8
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s0, -16
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s1, -24
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s2, -32
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s3, -40
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s4, -48
+; CHECK-SOFT-RV64-NEXT:    lw a2, 0(a1)
+; CHECK-SOFT-RV64-NEXT:    lw s0, 8(a1)
+; CHECK-SOFT-RV64-NEXT:    lw s1, 16(a1)
+; CHECK-SOFT-RV64-NEXT:    lw s2, 24(a1)
+; CHECK-SOFT-RV64-NEXT:    mv s3, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, a2
+; CHECK-SOFT-RV64-NEXT:    mv a1, a2
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    mv s4, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s0
+; CHECK-SOFT-RV64-NEXT:    mv a1, s0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    mv s0, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s1
+; CHECK-SOFT-RV64-NEXT:    mv a1, s1
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    mv s1, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s2
+; CHECK-SOFT-RV64-NEXT:    mv a1, s2
+; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    sw s4, 0(s3)
+; CHECK-SOFT-RV64-NEXT:    sw s0, 4(s3)
+; CHECK-SOFT-RV64-NEXT:    sw s1, 8(s3)
+; CHECK-SOFT-RV64-NEXT:    sw a0, 12(s3)
+; CHECK-SOFT-RV64-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s0
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s1
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s2
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s3
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s4
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, 48
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV64-NEXT:    ret
+;
+; CHECK-FP16-RV64-LABEL: fcanonicalize_v4f32_nnan:
+; CHECK-FP16-RV64:       # %bb.0:
+; CHECK-FP16-RV64-NEXT:    fmin.s fa5, fa0, fa0
+; CHECK-FP16-RV64-NEXT:    fmin.s fa4, fa1, fa1
+; CHECK-FP16-RV64-NEXT:    fmin.s fa2, fa2, fa2
+; CHECK-FP16-RV64-NEXT:    fmin.s fa3, fa3, fa3
+; CHECK-FP16-RV64-NEXT:    fsw fa5, 0(a0)
+; CHECK-FP16-RV64-NEXT:    fsw fa4, 4(a0)
+; CHECK-FP16-RV64-NEXT:    fsw fa2, 8(a0)
+; CHECK-FP16-RV64-NEXT:    fsw fa3, 12(a0)
+; CHECK-FP16-RV64-NEXT:    ret
+;
+; CHECK-NOFP16-RV64-LABEL: fcanonicalize_v4f32_nnan:
+; CHECK-NOFP16-RV64:       # %bb.0:
+; CHECK-NOFP16-RV64-NEXT:    fmin.s fa5, fa0, fa0
+; CHECK-NOFP16-RV64-NEXT:    fmin.s fa4, fa1, fa1
+; CHECK-NOFP16-RV64-NEXT:    fmin.s fa2, fa2, fa2
+; CHECK-NOFP16-RV64-NEXT:    fmin.s fa3, fa3, fa3
+; CHECK-NOFP16-RV64-NEXT:    fsw fa5, 0(a0)
+; CHECK-NOFP16-RV64-NEXT:    fsw fa4, 4(a0)
+; CHECK-NOFP16-RV64-NEXT:    fsw fa2, 8(a0)
+; CHECK-NOFP16-RV64-NEXT:    fsw fa3, 12(a0)
+; CHECK-NOFP16-RV64-NEXT:    ret
+;
+; CHECK-SOFT-RV32-LABEL: fcanonicalize_v4f32_nnan:
+; CHECK-SOFT-RV32:       # %bb.0:
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, -32
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SOFT-RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset ra, -4
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s0, -8
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s1, -12
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s2, -16
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s3, -20
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s4, -24
+; CHECK-SOFT-RV32-NEXT:    lw a2, 0(a1)
+; CHECK-SOFT-RV32-NEXT:    lw s0, 4(a1)
+; CHECK-SOFT-RV32-NEXT:    lw s1, 8(a1)
+; CHECK-SOFT-RV32-NEXT:    lw s2, 12(a1)
+; CHECK-SOFT-RV32-NEXT:    mv s3, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, a2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a2
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    mv s4, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s0
+; CHECK-SOFT-RV32-NEXT:    mv a1, s0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    mv s0, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s1
+; CHECK-SOFT-RV32-NEXT:    mv a1, s1
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    mv s1, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, s2
+; CHECK-SOFT-RV32-NEXT:    mv a1, s2
+; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    sw s4, 0(s3)
+; CHECK-SOFT-RV32-NEXT:    sw s0, 4(s3)
+; CHECK-SOFT-RV32-NEXT:    sw s1, 8(s3)
+; CHECK-SOFT-RV32-NEXT:    sw a0, 12(s3)
+; CHECK-SOFT-RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s0
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s1
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s2
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s3
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s4
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, 32
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV32-NEXT:    ret
+;
+; CHECK-FP16-RV32-LABEL: fcanonicalize_v4f32_nnan:
+; CHECK-FP16-RV32:       # %bb.0:
+; CHECK-FP16-RV32-NEXT:    fmin.s fa5, fa0, fa0
+; CHECK-FP16-RV32-NEXT:    fmin.s fa4, fa1, fa1
+; CHECK-FP16-RV32-NEXT:    fmin.s fa2, fa2, fa2
+; CHECK-FP16-RV32-NEXT:    fmin.s fa3, fa3, fa3
+; CHECK-FP16-RV32-NEXT:    fsw fa5, 0(a0)
+; CHECK-FP16-RV32-NEXT:    fsw fa4, 4(a0)
+; CHECK-FP16-RV32-NEXT:    fsw fa2, 8(a0)
+; CHECK-FP16-RV32-NEXT:    fsw fa3, 12(a0)
+; CHECK-FP16-RV32-NEXT:    ret
+;
+; CHECK-NOFP16-RV32-LABEL: fcanonicalize_v4f32_nnan:
+; CHECK-NOFP16-RV32:       # %bb.0:
+; CHECK-NOFP16-RV32-NEXT:    fmin.s fa5, fa0, fa0
+; CHECK-NOFP16-RV32-NEXT:    fmin.s fa4, fa1, fa1
+; CHECK-NOFP16-RV32-NEXT:    fmin.s fa2, fa2, fa2
+; CHECK-NOFP16-RV32-NEXT:    fmin.s fa3, fa3, fa3
+; CHECK-NOFP16-RV32-NEXT:    fsw fa5, 0(a0)
+; CHECK-NOFP16-RV32-NEXT:    fsw fa4, 4(a0)
+; CHECK-NOFP16-RV32-NEXT:    fsw fa2, 8(a0)
+; CHECK-NOFP16-RV32-NEXT:    fsw fa3, 12(a0)
+; CHECK-NOFP16-RV32-NEXT:    ret
+; RV32-SOFT-LABEL: fcanonicalize_v4f32_nnan:
+; RV32-SOFT:       # %bb.0:
+; RV32-SOFT-NEXT:    addi sp, sp, -32
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 32
+; RV32-SOFT-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    .cfi_offset ra, -4
+; RV32-SOFT-NEXT:    .cfi_offset s0, -8
+; RV32-SOFT-NEXT:    .cfi_offset s1, -12
+; RV32-SOFT-NEXT:    .cfi_offset s2, -16
+; RV32-SOFT-NEXT:    .cfi_offset s3, -20
+; RV32-SOFT-NEXT:    .cfi_offset s4, -24
+; RV32-SOFT-NEXT:    lw a2, 0(a1)
+; RV32-SOFT-NEXT:    lw s0, 4(a1)
+; RV32-SOFT-NEXT:    lw s1, 8(a1)
+; RV32-SOFT-NEXT:    lw s2, 12(a1)
+; RV32-SOFT-NEXT:    mv s3, a0
+; RV32-SOFT-NEXT:    mv a0, a2
+; RV32-SOFT-NEXT:    mv a1, a2
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    mv s4, a0
+; RV32-SOFT-NEXT:    mv a0, s0
+; RV32-SOFT-NEXT:    mv a1, s0
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    mv s0, a0
+; RV32-SOFT-NEXT:    mv a0, s1
+; RV32-SOFT-NEXT:    mv a1, s1
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    mv s1, a0
+; RV32-SOFT-NEXT:    mv a0, s2
+; RV32-SOFT-NEXT:    mv a1, s2
+; RV32-SOFT-NEXT:    call fminimum_numf
+; RV32-SOFT-NEXT:    sw s4, 0(s3)
+; RV32-SOFT-NEXT:    sw s0, 4(s3)
+; RV32-SOFT-NEXT:    sw s1, 8(s3)
+; RV32-SOFT-NEXT:    sw a0, 12(s3)
+; RV32-SOFT-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    .cfi_restore ra
+; RV32-SOFT-NEXT:    .cfi_restore s0
+; RV32-SOFT-NEXT:    .cfi_restore s1
+; RV32-SOFT-NEXT:    .cfi_restore s2
+; RV32-SOFT-NEXT:    .cfi_restore s3
+; RV32-SOFT-NEXT:    .cfi_restore s4
+; RV32-SOFT-NEXT:    addi sp, sp, 32
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV32-SOFT-NEXT:    ret
   %z = call nnan <4 x float> @llvm.canonicalize.v4f32(<4 x float> %x)
   ret <4 x float> %z
 }
 
 define double @fcanonicalize_f64(double %x) {
-; CHECK-LABEL: fcanonicalize_f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmin.d fa0, fa0, fa0
-; CHECK-NEXT:    ret
+; RV64-SOFT-LABEL: fcanonicalize_f64:
+; RV64-SOFT:       # %bb.0:
+; RV64-SOFT-NEXT:    addi sp, sp, -16
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 16
+; RV64-SOFT-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    .cfi_offset ra, -8
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_num
+; RV64-SOFT-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    .cfi_restore ra
+; RV64-SOFT-NEXT:    addi sp, sp, 16
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV64-SOFT-NEXT:    ret
+;
+; CHECK-SOFT-RV64-LABEL: fcanonicalize_f64:
+; CHECK-SOFT-RV64:       # %bb.0:
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -16
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SOFT-RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset ra, -8
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_num
+; CHECK-SOFT-RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, 16
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV64-NEXT:    ret
+;
+; CHECK-FP16-RV64-LABEL: fcanonicalize_f64:
+; CHECK-FP16-RV64:       # %bb.0:
+; CHECK-FP16-RV64-NEXT:    fmin.d fa0, fa0, fa0
+; CHECK-FP16-RV64-NEXT:    ret
+;
+; CHECK-NOFP16-RV64-LABEL: fcanonicalize_f64:
+; CHECK-NOFP16-RV64:       # %bb.0:
+; CHECK-NOFP16-RV64-NEXT:    fmin.d fa0, fa0, fa0
+; CHECK-NOFP16-RV64-NEXT:    ret
+;
+; CHECK-SOFT-RV32-LABEL: fcanonicalize_f64:
+; CHECK-SOFT-RV32:       # %bb.0:
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, -16
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SOFT-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset ra, -4
+; CHECK-SOFT-RV32-NEXT:    mv a2, a0
+; CHECK-SOFT-RV32-NEXT:    mv a3, a1
+; CHECK-SOFT-RV32-NEXT:    call fminimum_num
+; CHECK-SOFT-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, 16
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV32-NEXT:    ret
+;
+; CHECK-FP16-RV32-LABEL: fcanonicalize_f64:
+; CHECK-FP16-RV32:       # %bb.0:
+; CHECK-FP16-RV32-NEXT:    fmin.d fa0, fa0, fa0
+; CHECK-FP16-RV32-NEXT:    ret
+;
+; CHECK-NOFP16-RV32-LABEL: fcanonicalize_f64:
+; CHECK-NOFP16-RV32:       # %bb.0:
+; CHECK-NOFP16-RV32-NEXT:    fmin.d fa0, fa0, fa0
+; CHECK-NOFP16-RV32-NEXT:    ret
+; RV32-SOFT-LABEL: fcanonicalize_f64:
+; RV32-SOFT:       # %bb.0:
+; RV32-SOFT-NEXT:    addi sp, sp, -16
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 16
+; RV32-SOFT-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    .cfi_offset ra, -4
+; RV32-SOFT-NEXT:    mv a2, a0
+; RV32-SOFT-NEXT:    mv a3, a1
+; RV32-SOFT-NEXT:    call fminimum_num
+; RV32-SOFT-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    .cfi_restore ra
+; RV32-SOFT-NEXT:    addi sp, sp, 16
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV32-SOFT-NEXT:    ret
   %z = call double @llvm.canonicalize.f64(double %x)
   ret double %z
 }
 
 define double @fcanonicalize_f64_nnan(double %x) {
-; CHECK-LABEL: fcanonicalize_f64_nnan:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmin.d fa0, fa0, fa0
-; CHECK-NEXT:    ret
+; RV64-SOFT-LABEL: fcanonicalize_f64_nnan:
+; RV64-SOFT:       # %bb.0:
+; RV64-SOFT-NEXT:    addi sp, sp, -16
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 16
+; RV64-SOFT-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    .cfi_offset ra, -8
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_num
+; RV64-SOFT-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    .cfi_restore ra
+; RV64-SOFT-NEXT:    addi sp, sp, 16
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV64-SOFT-NEXT:    ret
+;
+; CHECK-SOFT-RV64-LABEL: fcanonicalize_f64_nnan:
+; CHECK-SOFT-RV64:       # %bb.0:
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -16
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SOFT-RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset ra, -8
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_num
+; CHECK-SOFT-RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, 16
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV64-NEXT:    ret
+;
+; CHECK-FP16-RV64-LABEL: fcanonicalize_f64_nnan:
+; CHECK-FP16-RV64:       # %bb.0:
+; CHECK-FP16-RV64-NEXT:    fmin.d fa0, fa0, fa0
+; CHECK-FP16-RV64-NEXT:    ret
+;
+; CHECK-NOFP16-RV64-LABEL: fcanonicalize_f64_nnan:
+; CHECK-NOFP16-RV64:       # %bb.0:
+; CHECK-NOFP16-RV64-NEXT:    fmin.d fa0, fa0, fa0
+; CHECK-NOFP16-RV64-NEXT:    ret
+;
+; CHECK-SOFT-RV32-LABEL: fcanonicalize_f64_nnan:
+; CHECK-SOFT-RV32:       # %bb.0:
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, -16
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SOFT-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset ra, -4
+; CHECK-SOFT-RV32-NEXT:    mv a2, a0
+; CHECK-SOFT-RV32-NEXT:    mv a3, a1
+; CHECK-SOFT-RV32-NEXT:    call fminimum_num
+; CHECK-SOFT-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, 16
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV32-NEXT:    ret
+;
+; CHECK-FP16-RV32-LABEL: fcanonicalize_f64_nnan:
+; CHECK-FP16-RV32:       # %bb.0:
+; CHECK-FP16-RV32-NEXT:    fmin.d fa0, fa0, fa0
+; CHECK-FP16-RV32-NEXT:    ret
+;
+; CHECK-NOFP16-RV32-LABEL: fcanonicalize_f64_nnan:
+; CHECK-NOFP16-RV32:       # %bb.0:
+; CHECK-NOFP16-RV32-NEXT:    fmin.d fa0, fa0, fa0
+; CHECK-NOFP16-RV32-NEXT:    ret
+; RV32-SOFT-LABEL: fcanonicalize_f64_nnan:
+; RV32-SOFT:       # %bb.0:
+; RV32-SOFT-NEXT:    addi sp, sp, -16
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 16
+; RV32-SOFT-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    .cfi_offset ra, -4
+; RV32-SOFT-NEXT:    mv a2, a0
+; RV32-SOFT-NEXT:    mv a3, a1
+; RV32-SOFT-NEXT:    call fminimum_num
+; RV32-SOFT-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    .cfi_restore ra
+; RV32-SOFT-NEXT:    addi sp, sp, 16
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV32-SOFT-NEXT:    ret
   %z = call nnan double @llvm.canonicalize.f64(double %x)
   ret double %z
 }
 
 define <2 x double> @fcanonicalize_v2f64(<2 x double> %x) {
-; CHECK-LABEL: fcanonicalize_v2f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmin.d fa0, fa0, fa0
-; CHECK-NEXT:    fmin.d fa1, fa1, fa1
-; CHECK-NEXT:    ret
+; RV64-SOFT-LABEL: fcanonicalize_v2f64:
+; RV64-SOFT:       # %bb.0:
+; RV64-SOFT-NEXT:    addi sp, sp, -32
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 32
+; RV64-SOFT-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    .cfi_offset ra, -8
+; RV64-SOFT-NEXT:    .cfi_offset s0, -16
+; RV64-SOFT-NEXT:    .cfi_offset s1, -24
+; RV64-SOFT-NEXT:    mv s0, a1
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_num
+; RV64-SOFT-NEXT:    mv s1, a0
+; RV64-SOFT-NEXT:    mv a0, s0
+; RV64-SOFT-NEXT:    mv a1, s0
+; RV64-SOFT-NEXT:    call fminimum_num
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    mv a0, s1
+; RV64-SOFT-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    .cfi_restore ra
+; RV64-SOFT-NEXT:    .cfi_restore s0
+; RV64-SOFT-NEXT:    .cfi_restore s1
+; RV64-SOFT-NEXT:    addi sp, sp, 32
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV64-SOFT-NEXT:    ret
+;
+; CHECK-SOFT-RV64-LABEL: fcanonicalize_v2f64:
+; CHECK-SOFT-RV64:       # %bb.0:
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -32
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SOFT-RV64-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset ra, -8
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s0, -16
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s1, -24
+; CHECK-SOFT-RV64-NEXT:    mv s0, a1
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_num
+; CHECK-SOFT-RV64-NEXT:    mv s1, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s0
+; CHECK-SOFT-RV64-NEXT:    mv a1, s0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_num
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s1
+; CHECK-SOFT-RV64-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s0
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s1
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, 32
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV64-NEXT:    ret
+;
+; CHECK-FP16-RV64-LABEL: fcanonicalize_v2f64:
+; CHECK-FP16-RV64:       # %bb.0:
+; CHECK-FP16-RV64-NEXT:    fmin.d fa0, fa0, fa0
+; CHECK-FP16-RV64-NEXT:    fmin.d fa1, fa1, fa1
+; CHECK-FP16-RV64-NEXT:    ret
+;
+; CHECK-NOFP16-RV64-LABEL: fcanonicalize_v2f64:
+; CHECK-NOFP16-RV64:       # %bb.0:
+; CHECK-NOFP16-RV64-NEXT:    fmin.d fa0, fa0, fa0
+; CHECK-NOFP16-RV64-NEXT:    fmin.d fa1, fa1, fa1
+; CHECK-NOFP16-RV64-NEXT:    ret
+;
+; CHECK-SOFT-RV32-LABEL: fcanonicalize_v2f64:
+; CHECK-SOFT-RV32:       # %bb.0:
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, -32
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SOFT-RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset ra, -4
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s0, -8
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s1, -12
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s2, -16
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s3, -20
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s4, -24
+; CHECK-SOFT-RV32-NEXT:    lw a2, 0(a1)
+; CHECK-SOFT-RV32-NEXT:    lw a3, 4(a1)
+; CHECK-SOFT-RV32-NEXT:    lw s0, 8(a1)
+; CHECK-SOFT-RV32-NEXT:    lw s1, 12(a1)
+; CHECK-SOFT-RV32-NEXT:    mv s2, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, a2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a3
+; CHECK-SOFT-RV32-NEXT:    call fminimum_num
+; CHECK-SOFT-RV32-NEXT:    mv s3, a0
+; CHECK-SOFT-RV32-NEXT:    mv s4, a1
+; CHECK-SOFT-RV32-NEXT:    mv a0, s0
+; CHECK-SOFT-RV32-NEXT:    mv a1, s1
+; CHECK-SOFT-RV32-NEXT:    mv a2, s0
+; CHECK-SOFT-RV32-NEXT:    mv a3, s1
+; CHECK-SOFT-RV32-NEXT:    call fminimum_num
+; CHECK-SOFT-RV32-NEXT:    sw s3, 0(s2)
+; CHECK-SOFT-RV32-NEXT:    sw s4, 4(s2)
+; CHECK-SOFT-RV32-NEXT:    sw a0, 8(s2)
+; CHECK-SOFT-RV32-NEXT:    sw a1, 12(s2)
+; CHECK-SOFT-RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s0
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s1
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s2
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s3
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s4
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, 32
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV32-NEXT:    ret
+;
+; CHECK-FP16-RV32-LABEL: fcanonicalize_v2f64:
+; CHECK-FP16-RV32:       # %bb.0:
+; CHECK-FP16-RV32-NEXT:    fmin.d fa0, fa0, fa0
+; CHECK-FP16-RV32-NEXT:    fmin.d fa1, fa1, fa1
+; CHECK-FP16-RV32-NEXT:    ret
+;
+; CHECK-NOFP16-RV32-LABEL: fcanonicalize_v2f64:
+; CHECK-NOFP16-RV32:       # %bb.0:
+; CHECK-NOFP16-RV32-NEXT:    fmin.d fa0, fa0, fa0
+; CHECK-NOFP16-RV32-NEXT:    fmin.d fa1, fa1, fa1
+; CHECK-NOFP16-RV32-NEXT:    ret
+; RV32-SOFT-LABEL: fcanonicalize_v2f64:
+; RV32-SOFT:       # %bb.0:
+; RV32-SOFT-NEXT:    addi sp, sp, -32
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 32
+; RV32-SOFT-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    .cfi_offset ra, -4
+; RV32-SOFT-NEXT:    .cfi_offset s0, -8
+; RV32-SOFT-NEXT:    .cfi_offset s1, -12
+; RV32-SOFT-NEXT:    .cfi_offset s2, -16
+; RV32-SOFT-NEXT:    .cfi_offset s3, -20
+; RV32-SOFT-NEXT:    .cfi_offset s4, -24
+; RV32-SOFT-NEXT:    lw a2, 0(a1)
+; RV32-SOFT-NEXT:    lw a3, 4(a1)
+; RV32-SOFT-NEXT:    lw s0, 8(a1)
+; RV32-SOFT-NEXT:    lw s1, 12(a1)
+; RV32-SOFT-NEXT:    mv s2, a0
+; RV32-SOFT-NEXT:    mv a0, a2
+; RV32-SOFT-NEXT:    mv a1, a3
+; RV32-SOFT-NEXT:    call fminimum_num
+; RV32-SOFT-NEXT:    mv s3, a0
+; RV32-SOFT-NEXT:    mv s4, a1
+; RV32-SOFT-NEXT:    mv a0, s0
+; RV32-SOFT-NEXT:    mv a1, s1
+; RV32-SOFT-NEXT:    mv a2, s0
+; RV32-SOFT-NEXT:    mv a3, s1
+; RV32-SOFT-NEXT:    call fminimum_num
+; RV32-SOFT-NEXT:    sw s3, 0(s2)
+; RV32-SOFT-NEXT:    sw s4, 4(s2)
+; RV32-SOFT-NEXT:    sw a0, 8(s2)
+; RV32-SOFT-NEXT:    sw a1, 12(s2)
+; RV32-SOFT-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    .cfi_restore ra
+; RV32-SOFT-NEXT:    .cfi_restore s0
+; RV32-SOFT-NEXT:    .cfi_restore s1
+; RV32-SOFT-NEXT:    .cfi_restore s2
+; RV32-SOFT-NEXT:    .cfi_restore s3
+; RV32-SOFT-NEXT:    .cfi_restore s4
+; RV32-SOFT-NEXT:    addi sp, sp, 32
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV32-SOFT-NEXT:    ret
   %z = call <2 x double> @llvm.canonicalize.v2f64(<2 x double> %x)
   ret <2 x double> %z
 }
 
 define <2 x double> @fcanonicalize_v2f64_nnan(<2 x double> %x) {
-; CHECK-LABEL: fcanonicalize_v2f64_nnan:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    fmin.d fa0, fa0, fa0
-; CHECK-NEXT:    fmin.d fa1, fa1, fa1
-; CHECK-NEXT:    ret
+; RV64-SOFT-LABEL: fcanonicalize_v2f64_nnan:
+; RV64-SOFT:       # %bb.0:
+; RV64-SOFT-NEXT:    addi sp, sp, -32
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 32
+; RV64-SOFT-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    .cfi_offset ra, -8
+; RV64-SOFT-NEXT:    .cfi_offset s0, -16
+; RV64-SOFT-NEXT:    .cfi_offset s1, -24
+; RV64-SOFT-NEXT:    mv s0, a1
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    call fminimum_num
+; RV64-SOFT-NEXT:    mv s1, a0
+; RV64-SOFT-NEXT:    mv a0, s0
+; RV64-SOFT-NEXT:    mv a1, s0
+; RV64-SOFT-NEXT:    call fminimum_num
+; RV64-SOFT-NEXT:    mv a1, a0
+; RV64-SOFT-NEXT:    mv a0, s1
+; RV64-SOFT-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    .cfi_restore ra
+; RV64-SOFT-NEXT:    .cfi_restore s0
+; RV64-SOFT-NEXT:    .cfi_restore s1
+; RV64-SOFT-NEXT:    addi sp, sp, 32
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV64-SOFT-NEXT:    ret
+;
+; CHECK-SOFT-RV64-LABEL: fcanonicalize_v2f64_nnan:
+; CHECK-SOFT-RV64:       # %bb.0:
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -32
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SOFT-RV64-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset ra, -8
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s0, -16
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s1, -24
+; CHECK-SOFT-RV64-NEXT:    mv s0, a1
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_num
+; CHECK-SOFT-RV64-NEXT:    mv s1, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s0
+; CHECK-SOFT-RV64-NEXT:    mv a1, s0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_num
+; CHECK-SOFT-RV64-NEXT:    mv a1, a0
+; CHECK-SOFT-RV64-NEXT:    mv a0, s1
+; CHECK-SOFT-RV64-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s0
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s1
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, 32
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV64-NEXT:    ret
+;
+; CHECK-FP16-RV64-LABEL: fcanonicalize_v2f64_nnan:
+; CHECK-FP16-RV64:       # %bb.0:
+; CHECK-FP16-RV64-NEXT:    fmin.d fa0, fa0, fa0
+; CHECK-FP16-RV64-NEXT:    fmin.d fa1, fa1, fa1
+; CHECK-FP16-RV64-NEXT:    ret
+;
+; CHECK-NOFP16-RV64-LABEL: fcanonicalize_v2f64_nnan:
+; CHECK-NOFP16-RV64:       # %bb.0:
+; CHECK-NOFP16-RV64-NEXT:    fmin.d fa0, fa0, fa0
+; CHECK-NOFP16-RV64-NEXT:    fmin.d fa1, fa1, fa1
+; CHECK-NOFP16-RV64-NEXT:    ret
+;
+; CHECK-SOFT-RV32-LABEL: fcanonicalize_v2f64_nnan:
+; CHECK-SOFT-RV32:       # %bb.0:
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, -32
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SOFT-RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset ra, -4
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s0, -8
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s1, -12
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s2, -16
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s3, -20
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s4, -24
+; CHECK-SOFT-RV32-NEXT:    lw a2, 0(a1)
+; CHECK-SOFT-RV32-NEXT:    lw a3, 4(a1)
+; CHECK-SOFT-RV32-NEXT:    lw s0, 8(a1)
+; CHECK-SOFT-RV32-NEXT:    lw s1, 12(a1)
+; CHECK-SOFT-RV32-NEXT:    mv s2, a0
+; CHECK-SOFT-RV32-NEXT:    mv a0, a2
+; CHECK-SOFT-RV32-NEXT:    mv a1, a3
+; CHECK-SOFT-RV32-NEXT:    call fminimum_num
+; CHECK-SOFT-RV32-NEXT:    mv s3, a0
+; CHECK-SOFT-RV32-NEXT:    mv s4, a1
+; CHECK-SOFT-RV32-NEXT:    mv a0, s0
+; CHECK-SOFT-RV32-NEXT:    mv a1, s1
+; CHECK-SOFT-RV32-NEXT:    mv a2, s0
+; CHECK-SOFT-RV32-NEXT:    mv a3, s1
+; CHECK-SOFT-RV32-NEXT:    call fminimum_num
+; CHECK-SOFT-RV32-NEXT:    sw s3, 0(s2)
+; CHECK-SOFT-RV32-NEXT:    sw s4, 4(s2)
+; CHECK-SOFT-RV32-NEXT:    sw a0, 8(s2)
+; CHECK-SOFT-RV32-NEXT:    sw a1, 12(s2)
+; CHECK-SOFT-RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s0
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s1
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s2
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s3
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s4
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, 32
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV32-NEXT:    ret
+;
+; CHECK-FP16-RV32-LABEL: fcanonicalize_v2f64_nnan:
+; CHECK-FP16-RV32:       # %bb.0:
+; CHECK-FP16-RV32-NEXT:    fmin.d fa0, fa0, fa0
+; CHECK-FP16-RV32-NEXT:    fmin.d fa1, fa1, fa1
+; CHECK-FP16-RV32-NEXT:    ret
+;
+; CHECK-NOFP16-RV32-LABEL: fcanonicalize_v2f64_nnan:
+; CHECK-NOFP16-RV32:       # %bb.0:
+; CHECK-NOFP16-RV32-NEXT:    fmin.d fa0, fa0, fa0
+; CHECK-NOFP16-RV32-NEXT:    fmin.d fa1, fa1, fa1
+; CHECK-NOFP16-RV32-NEXT:    ret
+; RV32-SOFT-LABEL: fcanonicalize_v2f64_nnan:
+; RV32-SOFT:       # %bb.0:
+; RV32-SOFT-NEXT:    addi sp, sp, -32
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 32
+; RV32-SOFT-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    .cfi_offset ra, -4
+; RV32-SOFT-NEXT:    .cfi_offset s0, -8
+; RV32-SOFT-NEXT:    .cfi_offset s1, -12
+; RV32-SOFT-NEXT:    .cfi_offset s2, -16
+; RV32-SOFT-NEXT:    .cfi_offset s3, -20
+; RV32-SOFT-NEXT:    .cfi_offset s4, -24
+; RV32-SOFT-NEXT:    lw a2, 0(a1)
+; RV32-SOFT-NEXT:    lw a3, 4(a1)
+; RV32-SOFT-NEXT:    lw s0, 8(a1)
+; RV32-SOFT-NEXT:    lw s1, 12(a1)
+; RV32-SOFT-NEXT:    mv s2, a0
+; RV32-SOFT-NEXT:    mv a0, a2
+; RV32-SOFT-NEXT:    mv a1, a3
+; RV32-SOFT-NEXT:    call fminimum_num
+; RV32-SOFT-NEXT:    mv s3, a0
+; RV32-SOFT-NEXT:    mv s4, a1
+; RV32-SOFT-NEXT:    mv a0, s0
+; RV32-SOFT-NEXT:    mv a1, s1
+; RV32-SOFT-NEXT:    mv a2, s0
+; RV32-SOFT-NEXT:    mv a3, s1
+; RV32-SOFT-NEXT:    call fminimum_num
+; RV32-SOFT-NEXT:    sw s3, 0(s2)
+; RV32-SOFT-NEXT:    sw s4, 4(s2)
+; RV32-SOFT-NEXT:    sw a0, 8(s2)
+; RV32-SOFT-NEXT:    sw a1, 12(s2)
+; RV32-SOFT-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    .cfi_restore ra
+; RV32-SOFT-NEXT:    .cfi_restore s0
+; RV32-SOFT-NEXT:    .cfi_restore s1
+; RV32-SOFT-NEXT:    .cfi_restore s2
+; RV32-SOFT-NEXT:    .cfi_restore s3
+; RV32-SOFT-NEXT:    .cfi_restore s4
+; RV32-SOFT-NEXT:    addi sp, sp, 32
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV32-SOFT-NEXT:    ret
   %z = call nnan <2 x double> @llvm.canonicalize.v2f64(<2 x double> %x)
   ret <2 x double> %z
 }
+
+define double @fcanonicalize_softfloat(double, double) unnamed_addr #0 {
+; RV64-SOFT-LABEL: fcanonicalize_softfloat:
+; RV64-SOFT:       # %bb.0: # %start
+; RV64-SOFT-NEXT:    addi sp, sp, -32
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 32
+; RV64-SOFT-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; RV64-SOFT-NEXT:    .cfi_offset ra, -8
+; RV64-SOFT-NEXT:    .cfi_offset s0, -16
+; RV64-SOFT-NEXT:    .cfi_offset s1, -24
+; RV64-SOFT-NEXT:    .cfi_offset s2, -32
+; RV64-SOFT-NEXT:    mv s0, a1
+; RV64-SOFT-NEXT:    mv s1, a0
+; RV64-SOFT-NEXT:    call __ltdf2
+; RV64-SOFT-NEXT:    srli s2, a0, 63
+; RV64-SOFT-NEXT:    mv a0, s1
+; RV64-SOFT-NEXT:    mv a1, s1
+; RV64-SOFT-NEXT:    call __unorddf2
+; RV64-SOFT-NEXT:    snez a0, a0
+; RV64-SOFT-NEXT:    or a0, a0, s2
+; RV64-SOFT-NEXT:    bnez a0, .LBB18_2
+; RV64-SOFT-NEXT:  # %bb.1: # %start
+; RV64-SOFT-NEXT:    mv s0, s1
+; RV64-SOFT-NEXT:  .LBB18_2: # %start
+; RV64-SOFT-NEXT:    mv a0, s0
+; RV64-SOFT-NEXT:    mv a1, s0
+; RV64-SOFT-NEXT:    call fminimum_num
+; RV64-SOFT-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; RV64-SOFT-NEXT:    .cfi_restore ra
+; RV64-SOFT-NEXT:    .cfi_restore s0
+; RV64-SOFT-NEXT:    .cfi_restore s1
+; RV64-SOFT-NEXT:    .cfi_restore s2
+; RV64-SOFT-NEXT:    addi sp, sp, 32
+; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV64-SOFT-NEXT:    ret
+;
+; CHECK-SOFT-RV64-LABEL: fcanonicalize_softfloat:
+; CHECK-SOFT-RV64:       # %bb.0: # %start
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -32
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SOFT-RV64-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset ra, -8
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s0, -16
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s1, -24
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s2, -32
+; CHECK-SOFT-RV64-NEXT:    mv s0, a1
+; CHECK-SOFT-RV64-NEXT:    mv s1, a0
+; CHECK-SOFT-RV64-NEXT:    call __ltdf2
+; CHECK-SOFT-RV64-NEXT:    srli s2, a0, 63
+; CHECK-SOFT-RV64-NEXT:    mv a0, s1
+; CHECK-SOFT-RV64-NEXT:    mv a1, s1
+; CHECK-SOFT-RV64-NEXT:    call __unorddf2
+; CHECK-SOFT-RV64-NEXT:    snez a0, a0
+; CHECK-SOFT-RV64-NEXT:    or a0, a0, s2
+; CHECK-SOFT-RV64-NEXT:    bnez a0, .LBB18_2
+; CHECK-SOFT-RV64-NEXT:  # %bb.1: # %start
+; CHECK-SOFT-RV64-NEXT:    mv s0, s1
+; CHECK-SOFT-RV64-NEXT:  .LBB18_2: # %start
+; CHECK-SOFT-RV64-NEXT:    mv a0, s0
+; CHECK-SOFT-RV64-NEXT:    mv a1, s0
+; CHECK-SOFT-RV64-NEXT:    call fminimum_num
+; CHECK-SOFT-RV64-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s0
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s1
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s2
+; CHECK-SOFT-RV64-NEXT:    addi sp, sp, 32
+; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV64-NEXT:    ret
+;
+; CHECK-FP16-RV64-LABEL: fcanonicalize_softfloat:
+; CHECK-FP16-RV64:       # %bb.0: # %start
+; CHECK-FP16-RV64-NEXT:    flt.d a0, fa0, fa1
+; CHECK-FP16-RV64-NEXT:    feq.d a1, fa0, fa0
+; CHECK-FP16-RV64-NEXT:    xori a1, a1, 1
+; CHECK-FP16-RV64-NEXT:    or a0, a1, a0
+; CHECK-FP16-RV64-NEXT:    bnez a0, .LBB18_2
+; CHECK-FP16-RV64-NEXT:  # %bb.1: # %start
+; CHECK-FP16-RV64-NEXT:    fmv.d fa1, fa0
+; CHECK-FP16-RV64-NEXT:  .LBB18_2: # %start
+; CHECK-FP16-RV64-NEXT:    fmin.d fa0, fa1, fa1
+; CHECK-FP16-RV64-NEXT:    ret
+;
+; CHECK-NOFP16-RV64-LABEL: fcanonicalize_softfloat:
+; CHECK-NOFP16-RV64:       # %bb.0: # %start
+; CHECK-NOFP16-RV64-NEXT:    flt.d a0, fa0, fa1
+; CHECK-NOFP16-RV64-NEXT:    feq.d a1, fa0, fa0
+; CHECK-NOFP16-RV64-NEXT:    xori a1, a1, 1
+; CHECK-NOFP16-RV64-NEXT:    or a0, a1, a0
+; CHECK-NOFP16-RV64-NEXT:    bnez a0, .LBB18_2
+; CHECK-NOFP16-RV64-NEXT:  # %bb.1: # %start
+; CHECK-NOFP16-RV64-NEXT:    fmv.d fa1, fa0
+; CHECK-NOFP16-RV64-NEXT:  .LBB18_2: # %start
+; CHECK-NOFP16-RV64-NEXT:    fmin.d fa0, fa1, fa1
+; CHECK-NOFP16-RV64-NEXT:    ret
+;
+; CHECK-SOFT-RV32-LABEL: fcanonicalize_softfloat:
+; CHECK-SOFT-RV32:       # %bb.0: # %start
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, -32
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SOFT-RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset ra, -4
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s0, -8
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s1, -12
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s2, -16
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s3, -20
+; CHECK-SOFT-RV32-NEXT:    .cfi_offset s4, -24
+; CHECK-SOFT-RV32-NEXT:    mv s0, a3
+; CHECK-SOFT-RV32-NEXT:    mv s1, a2
+; CHECK-SOFT-RV32-NEXT:    mv s2, a1
+; CHECK-SOFT-RV32-NEXT:    mv s3, a0
+; CHECK-SOFT-RV32-NEXT:    call __ltdf2
+; CHECK-SOFT-RV32-NEXT:    srli s4, a0, 31
+; CHECK-SOFT-RV32-NEXT:    mv a0, s3
+; CHECK-SOFT-RV32-NEXT:    mv a1, s2
+; CHECK-SOFT-RV32-NEXT:    mv a2, s3
+; CHECK-SOFT-RV32-NEXT:    mv a3, s2
+; CHECK-SOFT-RV32-NEXT:    call __unorddf2
+; CHECK-SOFT-RV32-NEXT:    snez a0, a0
+; CHECK-SOFT-RV32-NEXT:    or a0, a0, s4
+; CHECK-SOFT-RV32-NEXT:    bnez a0, .LBB18_2
+; CHECK-SOFT-RV32-NEXT:  # %bb.1: # %start
+; CHECK-SOFT-RV32-NEXT:    mv s1, s3
+; CHECK-SOFT-RV32-NEXT:    mv s0, s2
+; CHECK-SOFT-RV32-NEXT:  .LBB18_2: # %start
+; CHECK-SOFT-RV32-NEXT:    mv a0, s1
+; CHECK-SOFT-RV32-NEXT:    mv a1, s0
+; CHECK-SOFT-RV32-NEXT:    mv a2, s1
+; CHECK-SOFT-RV32-NEXT:    mv a3, s0
+; CHECK-SOFT-RV32-NEXT:    call fminimum_num
+; CHECK-SOFT-RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore ra
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s0
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s1
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s2
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s3
+; CHECK-SOFT-RV32-NEXT:    .cfi_restore s4
+; CHECK-SOFT-RV32-NEXT:    addi sp, sp, 32
+; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-SOFT-RV32-NEXT:    ret
+;
+; CHECK-FP16-RV32-LABEL: fcanonicalize_softfloat:
+; CHECK-FP16-RV32:       # %bb.0: # %start
+; CHECK-FP16-RV32-NEXT:    flt.d a0, fa0, fa1
+; CHECK-FP16-RV32-NEXT:    feq.d a1, fa0, fa0
+; CHECK-FP16-RV32-NEXT:    xori a1, a1, 1
+; CHECK-FP16-RV32-NEXT:    or a0, a1, a0
+; CHECK-FP16-RV32-NEXT:    bnez a0, .LBB18_2
+; CHECK-FP16-RV32-NEXT:  # %bb.1: # %start
+; CHECK-FP16-RV32-NEXT:    fmv.d fa1, fa0
+; CHECK-FP16-RV32-NEXT:  .LBB18_2: # %start
+; CHECK-FP16-RV32-NEXT:    fmin.d fa0, fa1, fa1
+; CHECK-FP16-RV32-NEXT:    ret
+;
+; CHECK-NOFP16-RV32-LABEL: fcanonicalize_softfloat:
+; CHECK-NOFP16-RV32:       # %bb.0: # %start
+; CHECK-NOFP16-RV32-NEXT:    flt.d a0, fa0, fa1
+; CHECK-NOFP16-RV32-NEXT:    feq.d a1, fa0, fa0
+; CHECK-NOFP16-RV32-NEXT:    xori a1, a1, 1
+; CHECK-NOFP16-RV32-NEXT:    or a0, a1, a0
+; CHECK-NOFP16-RV32-NEXT:    bnez a0, .LBB18_2
+; CHECK-NOFP16-RV32-NEXT:  # %bb.1: # %start
+; CHECK-NOFP16-RV32-NEXT:    fmv.d fa1, fa0
+; CHECK-NOFP16-RV32-NEXT:  .LBB18_2: # %start
+; CHECK-NOFP16-RV32-NEXT:    fmin.d fa0, fa1, fa1
+; CHECK-NOFP16-RV32-NEXT:    ret
+; RV32-SOFT-LABEL: fcanonicalize_softfloat:
+; RV32-SOFT:       # %bb.0: # %start
+; RV32-SOFT-NEXT:    addi sp, sp, -32
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 32
+; RV32-SOFT-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
+; RV32-SOFT-NEXT:    .cfi_offset ra, -4
+; RV32-SOFT-NEXT:    .cfi_offset s0, -8
+; RV32-SOFT-NEXT:    .cfi_offset s1, -12
+; RV32-SOFT-NEXT:    .cfi_offset s2, -16
+; RV32-SOFT-NEXT:    .cfi_offset s3, -20
+; RV32-SOFT-NEXT:    .cfi_offset s4, -24
+; RV32-SOFT-NEXT:    mv s0, a3
+; RV32-SOFT-NEXT:    mv s1, a2
+; RV32-SOFT-NEXT:    mv s2, a1
+; RV32-SOFT-NEXT:    mv s3, a0
+; RV32-SOFT-NEXT:    call __ltdf2
+; RV32-SOFT-NEXT:    srli s4, a0, 31
+; RV32-SOFT-NEXT:    mv a0, s3
+; RV32-SOFT-NEXT:    mv a1, s2
+; RV32-SOFT-NEXT:    mv a2, s3
+; RV32-SOFT-NEXT:    mv a3, s2
+; RV32-SOFT-NEXT:    call __unorddf2
+; RV32-SOFT-NEXT:    snez a0, a0
+; RV32-SOFT-NEXT:    or a0, a0, s4
+; RV32-SOFT-NEXT:    bnez a0, .LBB18_2
+; RV32-SOFT-NEXT:  # %bb.1: # %start
+; RV32-SOFT-NEXT:    mv s1, s3
+; RV32-SOFT-NEXT:    mv s0, s2
+; RV32-SOFT-NEXT:  .LBB18_2: # %start
+; RV32-SOFT-NEXT:    mv a0, s1
+; RV32-SOFT-NEXT:    mv a1, s0
+; RV32-SOFT-NEXT:    mv a2, s1
+; RV32-SOFT-NEXT:    mv a3, s0
+; RV32-SOFT-NEXT:    call fminimum_num
+; RV32-SOFT-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32-SOFT-NEXT:    .cfi_restore ra
+; RV32-SOFT-NEXT:    .cfi_restore s0
+; RV32-SOFT-NEXT:    .cfi_restore s1
+; RV32-SOFT-NEXT:    .cfi_restore s2
+; RV32-SOFT-NEXT:    .cfi_restore s3
+; RV32-SOFT-NEXT:    .cfi_restore s4
+; RV32-SOFT-NEXT:    addi sp, sp, 32
+; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
+; RV32-SOFT-NEXT:    ret
+start:
+  %2 = fcmp olt double %0, %1
+  %3 = fcmp uno double %0, 0.000000e+00
+  %or.cond.i.i = or i1 %3, %2
+  %4 = select i1 %or.cond.i.i, double %1, double %0
+  %5 = tail call double @llvm.canonicalize.f64(double %4) #2
+  ret double %5
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}

>From 08b567c65edbcf7b854724584e9379cb6fc0a874 Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Wed, 26 Nov 2025 08:32:36 +0100
Subject: [PATCH 3/5] [RISCV]: Removed auto

---
 llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index a162bfdc52189..4585de898e61d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -315,7 +315,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FCANONICALIZE(SDNode *N) {
   SDLoc dl(N);
-  auto Node = DAG.getNode(ISD::FMINIMUMNUM, dl, N->getValueType(0),
+  SDValue Node = DAG.getNode(ISD::FMINIMUMNUM, dl, N->getValueType(0),
                           N->getOperand(0), N->getOperand(0));
   return SoftenFloatRes_Binary(
       Node.getNode(),

>From 1e3a0361bd6485496b5d4831aef4f6d51f7712e3 Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Wed, 26 Nov 2025 10:29:26 +0100
Subject: [PATCH 4/5] [RISCV]: Use FMUL instead of fminimum_num

---
 .../SelectionDAG/LegalizeFloatTypes.cpp       |   21 +-
 llvm/test/CodeGen/RISCV/fp-fcanonicalize.ll   | 2355 ++---------------
 2 files changed, 237 insertions(+), 2139 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 4585de898e61d..1606ef0f52f4d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -315,13 +315,22 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FCANONICALIZE(SDNode *N) {
   SDLoc dl(N);
-  SDValue Node = DAG.getNode(ISD::FMINIMUMNUM, dl, N->getValueType(0),
-                          N->getOperand(0), N->getOperand(0));
+
+  // Create a constant 1.0, then soften it to integer and record the mapping.
+  SDValue CstFP = DAG.getConstantFP(1.0, dl, N->getValueType(0));
+  SDValue CstInt = SoftenFloatRes_ConstantFP(CstFP.getNode());
+
+  if (!SoftenedFloats[getTableId(CstFP)])
+    SetSoftenedFloat(CstFP, CstInt);
+
+  // Multiply the input by 1.0 to canonicalize it. We use `MorphNodeTo` to
+  // avoid constant folding, which happens with `DAG.getNode(ISD::FMUL, ...)`.
+  SDNode *Node =
+      DAG.MorphNodeTo(N, ISD::FMUL, DAG.getVTList(N->getValueType(0)),
+                      {N->getOperand(0), CstFP});
   return SoftenFloatRes_Binary(
-      Node.getNode(),
-      GetFPLibCall(N->getValueType(0), RTLIB::FMINIMUM_NUM_F32,
-                   RTLIB::FMINIMUM_NUM_F64, RTLIB::FMINIMUM_NUM_F80,
-                   RTLIB::FMINIMUM_NUM_F128, RTLIB::FMINIMUM_NUM_PPCF128));
+      Node, GetFPLibCall(N->getValueType(0), RTLIB::MUL_F32, RTLIB::MUL_F64,
+                         RTLIB::MUL_F80, RTLIB::MUL_F128, RTLIB::MUL_PPCF128));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) {
diff --git a/llvm/test/CodeGen/RISCV/fp-fcanonicalize.ll b/llvm/test/CodeGen/RISCV/fp-fcanonicalize.ll
index 5b0b77156ebb0..8ec0a89fb25e7 100644
--- a/llvm/test/CodeGen/RISCV/fp-fcanonicalize.ll
+++ b/llvm/test/CodeGen/RISCV/fp-fcanonicalize.ll
@@ -11,24 +11,6 @@ declare float @llvm.fcanonicalize.f32(float)
 declare double @llvm.fcanonicalize.f64(double)
 
 define half @fcanonicalize_f16(half %x) {
-; RV64-SOFT-LABEL: fcanonicalize_f16:
-; RV64-SOFT:       # %bb.0:
-; RV64-SOFT-NEXT:    addi sp, sp, -16
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 16
-; RV64-SOFT-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    .cfi_offset ra, -8
-; RV64-SOFT-NEXT:    slli a0, a0, 48
-; RV64-SOFT-NEXT:    srli a0, a0, 48
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    .cfi_restore ra
-; RV64-SOFT-NEXT:    addi sp, sp, 16
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV64-SOFT-NEXT:    ret
-;
 ; CHECK-SOFT-RV64-LABEL: fcanonicalize_f16:
 ; CHECK-SOFT-RV64:       # %bb.0:
 ; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -16
@@ -38,8 +20,8 @@ define half @fcanonicalize_f16(half %x) {
 ; CHECK-SOFT-RV64-NEXT:    slli a0, a0, 48
 ; CHECK-SOFT-RV64-NEXT:    srli a0, a0, 48
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; CHECK-SOFT-RV64-NEXT:    .cfi_restore ra
@@ -80,8 +62,8 @@ define half @fcanonicalize_f16(half %x) {
 ; CHECK-SOFT-RV32-NEXT:    slli a0, a0, 16
 ; CHECK-SOFT-RV32-NEXT:    srli a0, a0, 16
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; CHECK-SOFT-RV32-NEXT:    .cfi_restore ra
@@ -112,191 +94,11 @@ define half @fcanonicalize_f16(half %x) {
 ; CHECK-NOFP16-RV32-NEXT:    addi sp, sp, 16
 ; CHECK-NOFP16-RV32-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NOFP16-RV32-NEXT:    ret
-; RV32-SOFT-LABEL: fcanonicalize_f16:
-; RV32-SOFT:       # %bb.0:
-; RV32-SOFT-NEXT:    addi sp, sp, -16
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 16
-; RV32-SOFT-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    .cfi_offset ra, -4
-; RV32-SOFT-NEXT:    slli a0, a0, 16
-; RV32-SOFT-NEXT:    srli a0, a0, 16
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    .cfi_restore ra
-; RV32-SOFT-NEXT:    addi sp, sp, 16
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV32-SOFT-NEXT:    ret
-  %z = call half @llvm.canonicalize.f16(half %x)
-  ret half %z
-}
-
-define half @fcanonicalize_f16_nnan(half %x) {
-; RV64-SOFT-LABEL: fcanonicalize_f16_nnan:
-; RV64-SOFT:       # %bb.0:
-; RV64-SOFT-NEXT:    addi sp, sp, -16
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 16
-; RV64-SOFT-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    .cfi_offset ra, -8
-; RV64-SOFT-NEXT:    slli a0, a0, 48
-; RV64-SOFT-NEXT:    srli a0, a0, 48
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    .cfi_restore ra
-; RV64-SOFT-NEXT:    addi sp, sp, 16
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV64-SOFT-NEXT:    ret
-;
-; CHECK-SOFT-RV64-LABEL: fcanonicalize_f16_nnan:
-; CHECK-SOFT-RV64:       # %bb.0:
-; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -16
-; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-SOFT-RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; CHECK-SOFT-RV64-NEXT:    .cfi_offset ra, -8
-; CHECK-SOFT-RV64-NEXT:    slli a0, a0, 48
-; CHECK-SOFT-RV64-NEXT:    srli a0, a0, 48
-; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
-; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
-; CHECK-SOFT-RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
-; CHECK-SOFT-RV64-NEXT:    .cfi_restore ra
-; CHECK-SOFT-RV64-NEXT:    addi sp, sp, 16
-; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-SOFT-RV64-NEXT:    ret
-;
-; CHECK-FP16-RV64-LABEL: fcanonicalize_f16_nnan:
-; CHECK-FP16-RV64:       # %bb.0:
-; CHECK-FP16-RV64-NEXT:    fmin.h fa0, fa0, fa0
-; CHECK-FP16-RV64-NEXT:    ret
-;
-; CHECK-NOFP16-RV64-LABEL: fcanonicalize_f16_nnan:
-; CHECK-NOFP16-RV64:       # %bb.0:
-; CHECK-NOFP16-RV64-NEXT:    addi sp, sp, -16
-; CHECK-NOFP16-RV64-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NOFP16-RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; CHECK-NOFP16-RV64-NEXT:    .cfi_offset ra, -8
-; CHECK-NOFP16-RV64-NEXT:    call __extendhfsf2
-; CHECK-NOFP16-RV64-NEXT:    fmin.s fa0, fa0, fa0
-; CHECK-NOFP16-RV64-NEXT:    call __truncsfhf2
-; CHECK-NOFP16-RV64-NEXT:    fmv.x.w a0, fa0
-; CHECK-NOFP16-RV64-NEXT:    lui a1, 1048560
-; CHECK-NOFP16-RV64-NEXT:    or a0, a0, a1
-; CHECK-NOFP16-RV64-NEXT:    fmv.w.x fa0, a0
-; CHECK-NOFP16-RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
-; CHECK-NOFP16-RV64-NEXT:    .cfi_restore ra
-; CHECK-NOFP16-RV64-NEXT:    addi sp, sp, 16
-; CHECK-NOFP16-RV64-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-NOFP16-RV64-NEXT:    ret
-;
-; CHECK-SOFT-RV32-LABEL: fcanonicalize_f16_nnan:
-; CHECK-SOFT-RV32:       # %bb.0:
-; CHECK-SOFT-RV32-NEXT:    addi sp, sp, -16
-; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-SOFT-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK-SOFT-RV32-NEXT:    .cfi_offset ra, -4
-; CHECK-SOFT-RV32-NEXT:    slli a0, a0, 16
-; CHECK-SOFT-RV32-NEXT:    srli a0, a0, 16
-; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
-; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
-; CHECK-SOFT-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; CHECK-SOFT-RV32-NEXT:    .cfi_restore ra
-; CHECK-SOFT-RV32-NEXT:    addi sp, sp, 16
-; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-SOFT-RV32-NEXT:    ret
-;
-; CHECK-FP16-RV32-LABEL: fcanonicalize_f16_nnan:
-; CHECK-FP16-RV32:       # %bb.0:
-; CHECK-FP16-RV32-NEXT:    fmin.h fa0, fa0, fa0
-; CHECK-FP16-RV32-NEXT:    ret
-;
-; CHECK-NOFP16-RV32-LABEL: fcanonicalize_f16_nnan:
-; CHECK-NOFP16-RV32:       # %bb.0:
-; CHECK-NOFP16-RV32-NEXT:    addi sp, sp, -16
-; CHECK-NOFP16-RV32-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NOFP16-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK-NOFP16-RV32-NEXT:    .cfi_offset ra, -4
-; CHECK-NOFP16-RV32-NEXT:    call __extendhfsf2
-; CHECK-NOFP16-RV32-NEXT:    fmin.s fa0, fa0, fa0
-; CHECK-NOFP16-RV32-NEXT:    call __truncsfhf2
-; CHECK-NOFP16-RV32-NEXT:    fmv.x.w a0, fa0
-; CHECK-NOFP16-RV32-NEXT:    lui a1, 1048560
-; CHECK-NOFP16-RV32-NEXT:    or a0, a0, a1
-; CHECK-NOFP16-RV32-NEXT:    fmv.w.x fa0, a0
-; CHECK-NOFP16-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; CHECK-NOFP16-RV32-NEXT:    .cfi_restore ra
-; CHECK-NOFP16-RV32-NEXT:    addi sp, sp, 16
-; CHECK-NOFP16-RV32-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-NOFP16-RV32-NEXT:    ret
-; RV32-SOFT-LABEL: fcanonicalize_f16_nnan:
-; RV32-SOFT:       # %bb.0:
-; RV32-SOFT-NEXT:    addi sp, sp, -16
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 16
-; RV32-SOFT-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    .cfi_offset ra, -4
-; RV32-SOFT-NEXT:    slli a0, a0, 16
-; RV32-SOFT-NEXT:    srli a0, a0, 16
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    .cfi_restore ra
-; RV32-SOFT-NEXT:    addi sp, sp, 16
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV32-SOFT-NEXT:    ret
   %z = call nnan half @llvm.canonicalize.f16(half %x)
   ret half %z
 }
 
 define <2 x half> @fcanonicalize_v2f16(<2 x half> %x) {
-; RV64-SOFT-LABEL: fcanonicalize_v2f16:
-; RV64-SOFT:       # %bb.0:
-; RV64-SOFT-NEXT:    addi sp, sp, -32
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 32
-; RV64-SOFT-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    .cfi_offset ra, -8
-; RV64-SOFT-NEXT:    .cfi_offset s0, -16
-; RV64-SOFT-NEXT:    .cfi_offset s1, -24
-; RV64-SOFT-NEXT:    .cfi_offset s2, -32
-; RV64-SOFT-NEXT:    mv s0, a1
-; RV64-SOFT-NEXT:    lui a1, 16
-; RV64-SOFT-NEXT:    addi s2, a1, -1
-; RV64-SOFT-NEXT:    and a0, a0, s2
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    mv s1, a0
-; RV64-SOFT-NEXT:    and a0, s0, s2
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    mv a0, s1
-; RV64-SOFT-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    .cfi_restore ra
-; RV64-SOFT-NEXT:    .cfi_restore s0
-; RV64-SOFT-NEXT:    .cfi_restore s1
-; RV64-SOFT-NEXT:    .cfi_restore s2
-; RV64-SOFT-NEXT:    addi sp, sp, 32
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV64-SOFT-NEXT:    ret
-;
 ; CHECK-SOFT-RV64-LABEL: fcanonicalize_v2f16:
 ; CHECK-SOFT-RV64:       # %bb.0:
 ; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -32
@@ -314,14 +116,14 @@ define <2 x half> @fcanonicalize_v2f16(<2 x half> %x) {
 ; CHECK-SOFT-RV64-NEXT:    addi s2, a1, -1
 ; CHECK-SOFT-RV64-NEXT:    and a0, a0, s2
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    mv s1, a0
 ; CHECK-SOFT-RV64-NEXT:    and a0, s0, s2
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    mv a1, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s1
@@ -392,14 +194,14 @@ define <2 x half> @fcanonicalize_v2f16(<2 x half> %x) {
 ; CHECK-SOFT-RV32-NEXT:    addi s2, a1, -1
 ; CHECK-SOFT-RV32-NEXT:    and a0, a0, s2
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    mv s1, a0
 ; CHECK-SOFT-RV32-NEXT:    and a0, s0, s2
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    mv a1, a0
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s1
@@ -452,90 +254,11 @@ define <2 x half> @fcanonicalize_v2f16(<2 x half> %x) {
 ; CHECK-NOFP16-RV32-NEXT:    addi sp, sp, 16
 ; CHECK-NOFP16-RV32-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NOFP16-RV32-NEXT:    ret
-; RV32-SOFT-LABEL: fcanonicalize_v2f16:
-; RV32-SOFT:       # %bb.0:
-; RV32-SOFT-NEXT:    addi sp, sp, -16
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 16
-; RV32-SOFT-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    .cfi_offset ra, -4
-; RV32-SOFT-NEXT:    .cfi_offset s0, -8
-; RV32-SOFT-NEXT:    .cfi_offset s1, -12
-; RV32-SOFT-NEXT:    .cfi_offset s2, -16
-; RV32-SOFT-NEXT:    mv s0, a1
-; RV32-SOFT-NEXT:    lui a1, 16
-; RV32-SOFT-NEXT:    addi s2, a1, -1
-; RV32-SOFT-NEXT:    and a0, a0, s2
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    mv s1, a0
-; RV32-SOFT-NEXT:    and a0, s0, s2
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    mv a0, s1
-; RV32-SOFT-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    .cfi_restore ra
-; RV32-SOFT-NEXT:    .cfi_restore s0
-; RV32-SOFT-NEXT:    .cfi_restore s1
-; RV32-SOFT-NEXT:    .cfi_restore s2
-; RV32-SOFT-NEXT:    addi sp, sp, 16
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV32-SOFT-NEXT:    ret
   %z = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %x)
   ret <2 x half> %z
 }
 
 define <2 x half> @fcanonicalize_v2f16_nnan(<2 x half> %x) {
-; RV64-SOFT-LABEL: fcanonicalize_v2f16_nnan:
-; RV64-SOFT:       # %bb.0:
-; RV64-SOFT-NEXT:    addi sp, sp, -32
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 32
-; RV64-SOFT-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    .cfi_offset ra, -8
-; RV64-SOFT-NEXT:    .cfi_offset s0, -16
-; RV64-SOFT-NEXT:    .cfi_offset s1, -24
-; RV64-SOFT-NEXT:    .cfi_offset s2, -32
-; RV64-SOFT-NEXT:    mv s0, a1
-; RV64-SOFT-NEXT:    lui a1, 16
-; RV64-SOFT-NEXT:    addi s2, a1, -1
-; RV64-SOFT-NEXT:    and a0, a0, s2
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    mv s1, a0
-; RV64-SOFT-NEXT:    and a0, s0, s2
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    mv a0, s1
-; RV64-SOFT-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    .cfi_restore ra
-; RV64-SOFT-NEXT:    .cfi_restore s0
-; RV64-SOFT-NEXT:    .cfi_restore s1
-; RV64-SOFT-NEXT:    .cfi_restore s2
-; RV64-SOFT-NEXT:    addi sp, sp, 32
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV64-SOFT-NEXT:    ret
-;
 ; CHECK-SOFT-RV64-LABEL: fcanonicalize_v2f16_nnan:
 ; CHECK-SOFT-RV64:       # %bb.0:
 ; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -32
@@ -553,14 +276,14 @@ define <2 x half> @fcanonicalize_v2f16_nnan(<2 x half> %x) {
 ; CHECK-SOFT-RV64-NEXT:    addi s2, a1, -1
 ; CHECK-SOFT-RV64-NEXT:    and a0, a0, s2
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    mv s1, a0
 ; CHECK-SOFT-RV64-NEXT:    and a0, s0, s2
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    mv a1, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s1
@@ -631,14 +354,14 @@ define <2 x half> @fcanonicalize_v2f16_nnan(<2 x half> %x) {
 ; CHECK-SOFT-RV32-NEXT:    addi s2, a1, -1
 ; CHECK-SOFT-RV32-NEXT:    and a0, a0, s2
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    mv s1, a0
 ; CHECK-SOFT-RV32-NEXT:    and a0, s0, s2
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    mv a1, a0
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s1
@@ -691,114 +414,11 @@ define <2 x half> @fcanonicalize_v2f16_nnan(<2 x half> %x) {
 ; CHECK-NOFP16-RV32-NEXT:    addi sp, sp, 16
 ; CHECK-NOFP16-RV32-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NOFP16-RV32-NEXT:    ret
-; RV32-SOFT-LABEL: fcanonicalize_v2f16_nnan:
-; RV32-SOFT:       # %bb.0:
-; RV32-SOFT-NEXT:    addi sp, sp, -16
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 16
-; RV32-SOFT-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    .cfi_offset ra, -4
-; RV32-SOFT-NEXT:    .cfi_offset s0, -8
-; RV32-SOFT-NEXT:    .cfi_offset s1, -12
-; RV32-SOFT-NEXT:    .cfi_offset s2, -16
-; RV32-SOFT-NEXT:    mv s0, a1
-; RV32-SOFT-NEXT:    lui a1, 16
-; RV32-SOFT-NEXT:    addi s2, a1, -1
-; RV32-SOFT-NEXT:    and a0, a0, s2
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    mv s1, a0
-; RV32-SOFT-NEXT:    and a0, s0, s2
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    mv a0, s1
-; RV32-SOFT-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    .cfi_restore ra
-; RV32-SOFT-NEXT:    .cfi_restore s0
-; RV32-SOFT-NEXT:    .cfi_restore s1
-; RV32-SOFT-NEXT:    .cfi_restore s2
-; RV32-SOFT-NEXT:    addi sp, sp, 16
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV32-SOFT-NEXT:    ret
   %z = call nnan <2 x half> @llvm.canonicalize.v2f16(<2 x half> %x)
   ret <2 x half> %z
 }
 
 define <4 x half> @fcanonicalize_v4f16(<4 x half> %x) {
-; RV64-SOFT-LABEL: fcanonicalize_v4f16:
-; RV64-SOFT:       # %bb.0:
-; RV64-SOFT-NEXT:    addi sp, sp, -48
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 48
-; RV64-SOFT-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    .cfi_offset ra, -8
-; RV64-SOFT-NEXT:    .cfi_offset s0, -16
-; RV64-SOFT-NEXT:    .cfi_offset s1, -24
-; RV64-SOFT-NEXT:    .cfi_offset s2, -32
-; RV64-SOFT-NEXT:    .cfi_offset s3, -40
-; RV64-SOFT-NEXT:    .cfi_offset s4, -48
-; RV64-SOFT-NEXT:    lhu a2, 0(a1)
-; RV64-SOFT-NEXT:    lhu s1, 8(a1)
-; RV64-SOFT-NEXT:    lhu s2, 16(a1)
-; RV64-SOFT-NEXT:    lhu s3, 24(a1)
-; RV64-SOFT-NEXT:    mv s0, a0
-; RV64-SOFT-NEXT:    mv a0, a2
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    mv s4, a0
-; RV64-SOFT-NEXT:    mv a0, s1
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    mv s1, a0
-; RV64-SOFT-NEXT:    mv a0, s2
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    mv s2, a0
-; RV64-SOFT-NEXT:    mv a0, s3
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    sh s4, 0(s0)
-; RV64-SOFT-NEXT:    sh s1, 2(s0)
-; RV64-SOFT-NEXT:    sh s2, 4(s0)
-; RV64-SOFT-NEXT:    sh a0, 6(s0)
-; RV64-SOFT-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    .cfi_restore ra
-; RV64-SOFT-NEXT:    .cfi_restore s0
-; RV64-SOFT-NEXT:    .cfi_restore s1
-; RV64-SOFT-NEXT:    .cfi_restore s2
-; RV64-SOFT-NEXT:    .cfi_restore s3
-; RV64-SOFT-NEXT:    .cfi_restore s4
-; RV64-SOFT-NEXT:    addi sp, sp, 48
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV64-SOFT-NEXT:    ret
-;
 ; CHECK-SOFT-RV64-LABEL: fcanonicalize_v4f16:
 ; CHECK-SOFT-RV64:       # %bb.0:
 ; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -48
@@ -822,26 +442,26 @@ define <4 x half> @fcanonicalize_v4f16(<4 x half> %x) {
 ; CHECK-SOFT-RV64-NEXT:    mv s0, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, a2
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    mv s4, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s1
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    mv s1, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s2
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    mv s2, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s3
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    sh s4, 0(s0)
 ; CHECK-SOFT-RV64-NEXT:    sh s1, 2(s0)
@@ -970,26 +590,26 @@ define <4 x half> @fcanonicalize_v4f16(<4 x half> %x) {
 ; CHECK-SOFT-RV32-NEXT:    mv s0, a0
 ; CHECK-SOFT-RV32-NEXT:    mv a0, a2
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    mv s4, a0
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s1
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    mv s1, a0
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s2
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    mv s2, a0
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s3
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    sh s4, 0(s0)
 ; CHECK-SOFT-RV32-NEXT:    sh s1, 2(s0)
@@ -1101,138 +721,11 @@ define <4 x half> @fcanonicalize_v4f16(<4 x half> %x) {
 ; CHECK-NOFP16-RV32-NEXT:    addi sp, sp, 64
 ; CHECK-NOFP16-RV32-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NOFP16-RV32-NEXT:    ret
-; RV32-SOFT-LABEL: fcanonicalize_v4f16:
-; RV32-SOFT:       # %bb.0:
-; RV32-SOFT-NEXT:    addi sp, sp, -32
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 32
-; RV32-SOFT-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    .cfi_offset ra, -4
-; RV32-SOFT-NEXT:    .cfi_offset s0, -8
-; RV32-SOFT-NEXT:    .cfi_offset s1, -12
-; RV32-SOFT-NEXT:    .cfi_offset s2, -16
-; RV32-SOFT-NEXT:    .cfi_offset s3, -20
-; RV32-SOFT-NEXT:    .cfi_offset s4, -24
-; RV32-SOFT-NEXT:    lhu a2, 0(a1)
-; RV32-SOFT-NEXT:    lhu s1, 4(a1)
-; RV32-SOFT-NEXT:    lhu s2, 8(a1)
-; RV32-SOFT-NEXT:    lhu s3, 12(a1)
-; RV32-SOFT-NEXT:    mv s0, a0
-; RV32-SOFT-NEXT:    mv a0, a2
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    mv s4, a0
-; RV32-SOFT-NEXT:    mv a0, s1
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    mv s1, a0
-; RV32-SOFT-NEXT:    mv a0, s2
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    mv s2, a0
-; RV32-SOFT-NEXT:    mv a0, s3
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    sh s4, 0(s0)
-; RV32-SOFT-NEXT:    sh s1, 2(s0)
-; RV32-SOFT-NEXT:    sh s2, 4(s0)
-; RV32-SOFT-NEXT:    sh a0, 6(s0)
-; RV32-SOFT-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    .cfi_restore ra
-; RV32-SOFT-NEXT:    .cfi_restore s0
-; RV32-SOFT-NEXT:    .cfi_restore s1
-; RV32-SOFT-NEXT:    .cfi_restore s2
-; RV32-SOFT-NEXT:    .cfi_restore s3
-; RV32-SOFT-NEXT:    .cfi_restore s4
-; RV32-SOFT-NEXT:    addi sp, sp, 32
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV32-SOFT-NEXT:    ret
   %z = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> %x)
   ret <4 x half> %z
 }
 
 define <4 x half> @fcanonicalize_v4f16_nnan(<4 x half> %x) {
-; RV64-SOFT-LABEL: fcanonicalize_v4f16_nnan:
-; RV64-SOFT:       # %bb.0:
-; RV64-SOFT-NEXT:    addi sp, sp, -48
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 48
-; RV64-SOFT-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    .cfi_offset ra, -8
-; RV64-SOFT-NEXT:    .cfi_offset s0, -16
-; RV64-SOFT-NEXT:    .cfi_offset s1, -24
-; RV64-SOFT-NEXT:    .cfi_offset s2, -32
-; RV64-SOFT-NEXT:    .cfi_offset s3, -40
-; RV64-SOFT-NEXT:    .cfi_offset s4, -48
-; RV64-SOFT-NEXT:    lhu a2, 0(a1)
-; RV64-SOFT-NEXT:    lhu s1, 8(a1)
-; RV64-SOFT-NEXT:    lhu s2, 16(a1)
-; RV64-SOFT-NEXT:    lhu s3, 24(a1)
-; RV64-SOFT-NEXT:    mv s0, a0
-; RV64-SOFT-NEXT:    mv a0, a2
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    mv s4, a0
-; RV64-SOFT-NEXT:    mv a0, s1
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    mv s1, a0
-; RV64-SOFT-NEXT:    mv a0, s2
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    mv s2, a0
-; RV64-SOFT-NEXT:    mv a0, s3
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    sh s4, 0(s0)
-; RV64-SOFT-NEXT:    sh s1, 2(s0)
-; RV64-SOFT-NEXT:    sh s2, 4(s0)
-; RV64-SOFT-NEXT:    sh a0, 6(s0)
-; RV64-SOFT-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    .cfi_restore ra
-; RV64-SOFT-NEXT:    .cfi_restore s0
-; RV64-SOFT-NEXT:    .cfi_restore s1
-; RV64-SOFT-NEXT:    .cfi_restore s2
-; RV64-SOFT-NEXT:    .cfi_restore s3
-; RV64-SOFT-NEXT:    .cfi_restore s4
-; RV64-SOFT-NEXT:    addi sp, sp, 48
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV64-SOFT-NEXT:    ret
-;
 ; CHECK-SOFT-RV64-LABEL: fcanonicalize_v4f16_nnan:
 ; CHECK-SOFT-RV64:       # %bb.0:
 ; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -48
@@ -1256,26 +749,26 @@ define <4 x half> @fcanonicalize_v4f16_nnan(<4 x half> %x) {
 ; CHECK-SOFT-RV64-NEXT:    mv s0, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, a2
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    mv s4, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s1
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    mv s1, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s2
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    mv s2, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s3
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    sh s4, 0(s0)
 ; CHECK-SOFT-RV64-NEXT:    sh s1, 2(s0)
@@ -1404,26 +897,26 @@ define <4 x half> @fcanonicalize_v4f16_nnan(<4 x half> %x) {
 ; CHECK-SOFT-RV32-NEXT:    mv s0, a0
 ; CHECK-SOFT-RV32-NEXT:    mv a0, a2
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    mv s4, a0
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s1
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    mv s1, a0
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s2
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    mv s2, a0
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s3
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    sh s4, 0(s0)
 ; CHECK-SOFT-RV32-NEXT:    sh s1, 2(s0)
@@ -1535,186 +1028,11 @@ define <4 x half> @fcanonicalize_v4f16_nnan(<4 x half> %x) {
 ; CHECK-NOFP16-RV32-NEXT:    addi sp, sp, 64
 ; CHECK-NOFP16-RV32-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NOFP16-RV32-NEXT:    ret
-; RV32-SOFT-LABEL: fcanonicalize_v4f16_nnan:
-; RV32-SOFT:       # %bb.0:
-; RV32-SOFT-NEXT:    addi sp, sp, -32
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 32
-; RV32-SOFT-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    .cfi_offset ra, -4
-; RV32-SOFT-NEXT:    .cfi_offset s0, -8
-; RV32-SOFT-NEXT:    .cfi_offset s1, -12
-; RV32-SOFT-NEXT:    .cfi_offset s2, -16
-; RV32-SOFT-NEXT:    .cfi_offset s3, -20
-; RV32-SOFT-NEXT:    .cfi_offset s4, -24
-; RV32-SOFT-NEXT:    lhu a2, 0(a1)
-; RV32-SOFT-NEXT:    lhu s1, 4(a1)
-; RV32-SOFT-NEXT:    lhu s2, 8(a1)
-; RV32-SOFT-NEXT:    lhu s3, 12(a1)
-; RV32-SOFT-NEXT:    mv s0, a0
-; RV32-SOFT-NEXT:    mv a0, a2
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    mv s4, a0
-; RV32-SOFT-NEXT:    mv a0, s1
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    mv s1, a0
-; RV32-SOFT-NEXT:    mv a0, s2
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    mv s2, a0
-; RV32-SOFT-NEXT:    mv a0, s3
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    sh s4, 0(s0)
-; RV32-SOFT-NEXT:    sh s1, 2(s0)
-; RV32-SOFT-NEXT:    sh s2, 4(s0)
-; RV32-SOFT-NEXT:    sh a0, 6(s0)
-; RV32-SOFT-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    .cfi_restore ra
-; RV32-SOFT-NEXT:    .cfi_restore s0
-; RV32-SOFT-NEXT:    .cfi_restore s1
-; RV32-SOFT-NEXT:    .cfi_restore s2
-; RV32-SOFT-NEXT:    .cfi_restore s3
-; RV32-SOFT-NEXT:    .cfi_restore s4
-; RV32-SOFT-NEXT:    addi sp, sp, 32
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV32-SOFT-NEXT:    ret
   %z = call nnan <4 x half> @llvm.canonicalize.v4f16(<4 x half> %x)
   ret <4 x half> %z
 }
 
 define <8 x half> @fcanonicalize_v8f16(<8 x half> %x) {
-; RV64-SOFT-LABEL: fcanonicalize_v8f16:
-; RV64-SOFT:       # %bb.0:
-; RV64-SOFT-NEXT:    addi sp, sp, -80
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 80
-; RV64-SOFT-NEXT:    sd ra, 72(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s0, 64(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s1, 56(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s2, 48(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s3, 40(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s4, 32(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s5, 24(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s6, 16(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s7, 8(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s8, 0(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    .cfi_offset ra, -8
-; RV64-SOFT-NEXT:    .cfi_offset s0, -16
-; RV64-SOFT-NEXT:    .cfi_offset s1, -24
-; RV64-SOFT-NEXT:    .cfi_offset s2, -32
-; RV64-SOFT-NEXT:    .cfi_offset s3, -40
-; RV64-SOFT-NEXT:    .cfi_offset s4, -48
-; RV64-SOFT-NEXT:    .cfi_offset s5, -56
-; RV64-SOFT-NEXT:    .cfi_offset s6, -64
-; RV64-SOFT-NEXT:    .cfi_offset s7, -72
-; RV64-SOFT-NEXT:    .cfi_offset s8, -80
-; RV64-SOFT-NEXT:    lhu s7, 32(a1)
-; RV64-SOFT-NEXT:    lhu s5, 40(a1)
-; RV64-SOFT-NEXT:    lhu s3, 48(a1)
-; RV64-SOFT-NEXT:    lhu s1, 56(a1)
-; RV64-SOFT-NEXT:    lhu a2, 0(a1)
-; RV64-SOFT-NEXT:    lhu s4, 8(a1)
-; RV64-SOFT-NEXT:    lhu s6, 16(a1)
-; RV64-SOFT-NEXT:    lhu s8, 24(a1)
-; RV64-SOFT-NEXT:    mv s0, a0
-; RV64-SOFT-NEXT:    mv a0, a2
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    mv s2, a0
-; RV64-SOFT-NEXT:    mv a0, s4
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    mv s4, a0
-; RV64-SOFT-NEXT:    mv a0, s6
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    mv s6, a0
-; RV64-SOFT-NEXT:    mv a0, s8
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    mv s8, a0
-; RV64-SOFT-NEXT:    mv a0, s7
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    mv s7, a0
-; RV64-SOFT-NEXT:    mv a0, s5
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    mv s5, a0
-; RV64-SOFT-NEXT:    mv a0, s3
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    mv s3, a0
-; RV64-SOFT-NEXT:    mv a0, s1
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    sh s7, 8(s0)
-; RV64-SOFT-NEXT:    sh s5, 10(s0)
-; RV64-SOFT-NEXT:    sh s3, 12(s0)
-; RV64-SOFT-NEXT:    sh a0, 14(s0)
-; RV64-SOFT-NEXT:    sh s2, 0(s0)
-; RV64-SOFT-NEXT:    sh s4, 2(s0)
-; RV64-SOFT-NEXT:    sh s6, 4(s0)
-; RV64-SOFT-NEXT:    sh s8, 6(s0)
-; RV64-SOFT-NEXT:    ld ra, 72(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s0, 64(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s1, 56(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s2, 48(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s3, 40(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s4, 32(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s5, 24(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s6, 16(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s7, 8(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s8, 0(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    .cfi_restore ra
-; RV64-SOFT-NEXT:    .cfi_restore s0
-; RV64-SOFT-NEXT:    .cfi_restore s1
-; RV64-SOFT-NEXT:    .cfi_restore s2
-; RV64-SOFT-NEXT:    .cfi_restore s3
-; RV64-SOFT-NEXT:    .cfi_restore s4
-; RV64-SOFT-NEXT:    .cfi_restore s5
-; RV64-SOFT-NEXT:    .cfi_restore s6
-; RV64-SOFT-NEXT:    .cfi_restore s7
-; RV64-SOFT-NEXT:    .cfi_restore s8
-; RV64-SOFT-NEXT:    addi sp, sp, 80
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV64-SOFT-NEXT:    ret
-;
 ; CHECK-SOFT-RV64-LABEL: fcanonicalize_v8f16:
 ; CHECK-SOFT-RV64:       # %bb.0:
 ; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -80
@@ -1750,50 +1068,50 @@ define <8 x half> @fcanonicalize_v8f16(<8 x half> %x) {
 ; CHECK-SOFT-RV64-NEXT:    mv s0, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, a2
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    mv s2, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s4
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    mv s4, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s6
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    mv s6, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s8
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    mv s8, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s7
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    mv s7, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s5
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    mv s5, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s3
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    mv s3, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s1
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    sh s7, 8(s0)
 ; CHECK-SOFT-RV64-NEXT:    sh s5, 10(s0)
@@ -2018,50 +1336,50 @@ define <8 x half> @fcanonicalize_v8f16(<8 x half> %x) {
 ; CHECK-SOFT-RV32-NEXT:    mv s0, a0
 ; CHECK-SOFT-RV32-NEXT:    mv a0, a2
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    mv s2, a0
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s4
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    mv s4, a0
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s6
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    mv s6, a0
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s8
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    mv s8, a0
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s7
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    mv s7, a0
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s5
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    mv s5, a0
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s3
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    mv s3, a0
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s1
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    sh s7, 8(s0)
 ; CHECK-SOFT-RV32-NEXT:    sh s5, 10(s0)
@@ -2261,234 +1579,11 @@ define <8 x half> @fcanonicalize_v8f16(<8 x half> %x) {
 ; CHECK-NOFP16-RV32-NEXT:    addi sp, sp, 112
 ; CHECK-NOFP16-RV32-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NOFP16-RV32-NEXT:    ret
-; RV32-SOFT-LABEL: fcanonicalize_v8f16:
-; RV32-SOFT:       # %bb.0:
-; RV32-SOFT-NEXT:    addi sp, sp, -48
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 48
-; RV32-SOFT-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s0, 40(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s1, 36(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s2, 32(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s3, 28(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s4, 24(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s5, 20(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s6, 16(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s7, 12(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s8, 8(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    .cfi_offset ra, -4
-; RV32-SOFT-NEXT:    .cfi_offset s0, -8
-; RV32-SOFT-NEXT:    .cfi_offset s1, -12
-; RV32-SOFT-NEXT:    .cfi_offset s2, -16
-; RV32-SOFT-NEXT:    .cfi_offset s3, -20
-; RV32-SOFT-NEXT:    .cfi_offset s4, -24
-; RV32-SOFT-NEXT:    .cfi_offset s5, -28
-; RV32-SOFT-NEXT:    .cfi_offset s6, -32
-; RV32-SOFT-NEXT:    .cfi_offset s7, -36
-; RV32-SOFT-NEXT:    .cfi_offset s8, -40
-; RV32-SOFT-NEXT:    lhu s7, 16(a1)
-; RV32-SOFT-NEXT:    lhu s5, 20(a1)
-; RV32-SOFT-NEXT:    lhu s3, 24(a1)
-; RV32-SOFT-NEXT:    lhu s1, 28(a1)
-; RV32-SOFT-NEXT:    lhu a2, 0(a1)
-; RV32-SOFT-NEXT:    lhu s4, 4(a1)
-; RV32-SOFT-NEXT:    lhu s6, 8(a1)
-; RV32-SOFT-NEXT:    lhu s8, 12(a1)
-; RV32-SOFT-NEXT:    mv s0, a0
-; RV32-SOFT-NEXT:    mv a0, a2
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    mv s2, a0
-; RV32-SOFT-NEXT:    mv a0, s4
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    mv s4, a0
-; RV32-SOFT-NEXT:    mv a0, s6
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    mv s6, a0
-; RV32-SOFT-NEXT:    mv a0, s8
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    mv s8, a0
-; RV32-SOFT-NEXT:    mv a0, s7
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    mv s7, a0
-; RV32-SOFT-NEXT:    mv a0, s5
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    mv s5, a0
-; RV32-SOFT-NEXT:    mv a0, s3
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    mv s3, a0
-; RV32-SOFT-NEXT:    mv a0, s1
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    sh s7, 8(s0)
-; RV32-SOFT-NEXT:    sh s5, 10(s0)
-; RV32-SOFT-NEXT:    sh s3, 12(s0)
-; RV32-SOFT-NEXT:    sh a0, 14(s0)
-; RV32-SOFT-NEXT:    sh s2, 0(s0)
-; RV32-SOFT-NEXT:    sh s4, 2(s0)
-; RV32-SOFT-NEXT:    sh s6, 4(s0)
-; RV32-SOFT-NEXT:    sh s8, 6(s0)
-; RV32-SOFT-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    .cfi_restore ra
-; RV32-SOFT-NEXT:    .cfi_restore s0
-; RV32-SOFT-NEXT:    .cfi_restore s1
-; RV32-SOFT-NEXT:    .cfi_restore s2
-; RV32-SOFT-NEXT:    .cfi_restore s3
-; RV32-SOFT-NEXT:    .cfi_restore s4
-; RV32-SOFT-NEXT:    .cfi_restore s5
-; RV32-SOFT-NEXT:    .cfi_restore s6
-; RV32-SOFT-NEXT:    .cfi_restore s7
-; RV32-SOFT-NEXT:    .cfi_restore s8
-; RV32-SOFT-NEXT:    addi sp, sp, 48
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV32-SOFT-NEXT:    ret
   %z = call <8 x half> @llvm.canonicalize.v8f16(<8 x half> %x)
   ret <8 x half> %z
 }
 
 define <8 x half> @fcanonicalize_v8f16_nnan(<8 x half> %x) {
-; RV64-SOFT-LABEL: fcanonicalize_v8f16_nnan:
-; RV64-SOFT:       # %bb.0:
-; RV64-SOFT-NEXT:    addi sp, sp, -80
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 80
-; RV64-SOFT-NEXT:    sd ra, 72(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s0, 64(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s1, 56(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s2, 48(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s3, 40(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s4, 32(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s5, 24(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s6, 16(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s7, 8(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s8, 0(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    .cfi_offset ra, -8
-; RV64-SOFT-NEXT:    .cfi_offset s0, -16
-; RV64-SOFT-NEXT:    .cfi_offset s1, -24
-; RV64-SOFT-NEXT:    .cfi_offset s2, -32
-; RV64-SOFT-NEXT:    .cfi_offset s3, -40
-; RV64-SOFT-NEXT:    .cfi_offset s4, -48
-; RV64-SOFT-NEXT:    .cfi_offset s5, -56
-; RV64-SOFT-NEXT:    .cfi_offset s6, -64
-; RV64-SOFT-NEXT:    .cfi_offset s7, -72
-; RV64-SOFT-NEXT:    .cfi_offset s8, -80
-; RV64-SOFT-NEXT:    lhu s7, 32(a1)
-; RV64-SOFT-NEXT:    lhu s5, 40(a1)
-; RV64-SOFT-NEXT:    lhu s3, 48(a1)
-; RV64-SOFT-NEXT:    lhu s1, 56(a1)
-; RV64-SOFT-NEXT:    lhu a2, 0(a1)
-; RV64-SOFT-NEXT:    lhu s4, 8(a1)
-; RV64-SOFT-NEXT:    lhu s6, 16(a1)
-; RV64-SOFT-NEXT:    lhu s8, 24(a1)
-; RV64-SOFT-NEXT:    mv s0, a0
-; RV64-SOFT-NEXT:    mv a0, a2
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    mv s2, a0
-; RV64-SOFT-NEXT:    mv a0, s4
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    mv s4, a0
-; RV64-SOFT-NEXT:    mv a0, s6
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    mv s6, a0
-; RV64-SOFT-NEXT:    mv a0, s8
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    mv s8, a0
-; RV64-SOFT-NEXT:    mv a0, s7
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    mv s7, a0
-; RV64-SOFT-NEXT:    mv a0, s5
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    mv s5, a0
-; RV64-SOFT-NEXT:    mv a0, s3
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    mv s3, a0
-; RV64-SOFT-NEXT:    mv a0, s1
-; RV64-SOFT-NEXT:    call __extendhfsf2
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    call __truncsfhf2
-; RV64-SOFT-NEXT:    sh s7, 8(s0)
-; RV64-SOFT-NEXT:    sh s5, 10(s0)
-; RV64-SOFT-NEXT:    sh s3, 12(s0)
-; RV64-SOFT-NEXT:    sh a0, 14(s0)
-; RV64-SOFT-NEXT:    sh s2, 0(s0)
-; RV64-SOFT-NEXT:    sh s4, 2(s0)
-; RV64-SOFT-NEXT:    sh s6, 4(s0)
-; RV64-SOFT-NEXT:    sh s8, 6(s0)
-; RV64-SOFT-NEXT:    ld ra, 72(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s0, 64(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s1, 56(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s2, 48(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s3, 40(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s4, 32(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s5, 24(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s6, 16(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s7, 8(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s8, 0(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    .cfi_restore ra
-; RV64-SOFT-NEXT:    .cfi_restore s0
-; RV64-SOFT-NEXT:    .cfi_restore s1
-; RV64-SOFT-NEXT:    .cfi_restore s2
-; RV64-SOFT-NEXT:    .cfi_restore s3
-; RV64-SOFT-NEXT:    .cfi_restore s4
-; RV64-SOFT-NEXT:    .cfi_restore s5
-; RV64-SOFT-NEXT:    .cfi_restore s6
-; RV64-SOFT-NEXT:    .cfi_restore s7
-; RV64-SOFT-NEXT:    .cfi_restore s8
-; RV64-SOFT-NEXT:    addi sp, sp, 80
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV64-SOFT-NEXT:    ret
-;
 ; CHECK-SOFT-RV64-LABEL: fcanonicalize_v8f16_nnan:
 ; CHECK-SOFT-RV64:       # %bb.0:
 ; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -80
@@ -2524,50 +1619,50 @@ define <8 x half> @fcanonicalize_v8f16_nnan(<8 x half> %x) {
 ; CHECK-SOFT-RV64-NEXT:    mv s0, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, a2
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    mv s2, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s4
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    mv s4, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s6
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    mv s6, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s8
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    mv s8, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s7
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    mv s7, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s5
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    mv s5, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s3
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    mv s3, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s1
 ; CHECK-SOFT-RV64-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV64-NEXT:    sh s7, 8(s0)
 ; CHECK-SOFT-RV64-NEXT:    sh s5, 10(s0)
@@ -2792,50 +1887,50 @@ define <8 x half> @fcanonicalize_v8f16_nnan(<8 x half> %x) {
 ; CHECK-SOFT-RV32-NEXT:    mv s0, a0
 ; CHECK-SOFT-RV32-NEXT:    mv a0, a2
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    mv s2, a0
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s4
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    mv s4, a0
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s6
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    mv s6, a0
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s8
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    mv s8, a0
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s7
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    mv s7, a0
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s5
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    mv s5, a0
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s3
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    mv s3, a0
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s1
 ; CHECK-SOFT-RV32-NEXT:    call __extendhfsf2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    call __truncsfhf2
 ; CHECK-SOFT-RV32-NEXT:    sh s7, 8(s0)
 ; CHECK-SOFT-RV32-NEXT:    sh s5, 10(s0)
@@ -3035,144 +2130,19 @@ define <8 x half> @fcanonicalize_v8f16_nnan(<8 x half> %x) {
 ; CHECK-NOFP16-RV32-NEXT:    addi sp, sp, 112
 ; CHECK-NOFP16-RV32-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-NOFP16-RV32-NEXT:    ret
-; RV32-SOFT-LABEL: fcanonicalize_v8f16_nnan:
-; RV32-SOFT:       # %bb.0:
-; RV32-SOFT-NEXT:    addi sp, sp, -48
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 48
-; RV32-SOFT-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s0, 40(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s1, 36(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s2, 32(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s3, 28(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s4, 24(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s5, 20(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s6, 16(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s7, 12(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s8, 8(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    .cfi_offset ra, -4
-; RV32-SOFT-NEXT:    .cfi_offset s0, -8
-; RV32-SOFT-NEXT:    .cfi_offset s1, -12
-; RV32-SOFT-NEXT:    .cfi_offset s2, -16
-; RV32-SOFT-NEXT:    .cfi_offset s3, -20
-; RV32-SOFT-NEXT:    .cfi_offset s4, -24
-; RV32-SOFT-NEXT:    .cfi_offset s5, -28
-; RV32-SOFT-NEXT:    .cfi_offset s6, -32
-; RV32-SOFT-NEXT:    .cfi_offset s7, -36
-; RV32-SOFT-NEXT:    .cfi_offset s8, -40
-; RV32-SOFT-NEXT:    lhu s7, 16(a1)
-; RV32-SOFT-NEXT:    lhu s5, 20(a1)
-; RV32-SOFT-NEXT:    lhu s3, 24(a1)
-; RV32-SOFT-NEXT:    lhu s1, 28(a1)
-; RV32-SOFT-NEXT:    lhu a2, 0(a1)
-; RV32-SOFT-NEXT:    lhu s4, 4(a1)
-; RV32-SOFT-NEXT:    lhu s6, 8(a1)
-; RV32-SOFT-NEXT:    lhu s8, 12(a1)
-; RV32-SOFT-NEXT:    mv s0, a0
-; RV32-SOFT-NEXT:    mv a0, a2
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    mv s2, a0
-; RV32-SOFT-NEXT:    mv a0, s4
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    mv s4, a0
-; RV32-SOFT-NEXT:    mv a0, s6
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    mv s6, a0
-; RV32-SOFT-NEXT:    mv a0, s8
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    mv s8, a0
-; RV32-SOFT-NEXT:    mv a0, s7
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    mv s7, a0
-; RV32-SOFT-NEXT:    mv a0, s5
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    mv s5, a0
-; RV32-SOFT-NEXT:    mv a0, s3
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    mv s3, a0
-; RV32-SOFT-NEXT:    mv a0, s1
-; RV32-SOFT-NEXT:    call __extendhfsf2
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    call __truncsfhf2
-; RV32-SOFT-NEXT:    sh s7, 8(s0)
-; RV32-SOFT-NEXT:    sh s5, 10(s0)
-; RV32-SOFT-NEXT:    sh s3, 12(s0)
-; RV32-SOFT-NEXT:    sh a0, 14(s0)
-; RV32-SOFT-NEXT:    sh s2, 0(s0)
-; RV32-SOFT-NEXT:    sh s4, 2(s0)
-; RV32-SOFT-NEXT:    sh s6, 4(s0)
-; RV32-SOFT-NEXT:    sh s8, 6(s0)
-; RV32-SOFT-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    .cfi_restore ra
-; RV32-SOFT-NEXT:    .cfi_restore s0
-; RV32-SOFT-NEXT:    .cfi_restore s1
-; RV32-SOFT-NEXT:    .cfi_restore s2
-; RV32-SOFT-NEXT:    .cfi_restore s3
-; RV32-SOFT-NEXT:    .cfi_restore s4
-; RV32-SOFT-NEXT:    .cfi_restore s5
-; RV32-SOFT-NEXT:    .cfi_restore s6
-; RV32-SOFT-NEXT:    .cfi_restore s7
-; RV32-SOFT-NEXT:    .cfi_restore s8
-; RV32-SOFT-NEXT:    addi sp, sp, 48
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV32-SOFT-NEXT:    ret
   %z = call nnan <8 x half> @llvm.canonicalize.v8f16(<8 x half> %x)
   ret <8 x half> %z
 }
 
 define float @fcanonicalize_f32(float %x) {
-; RV64-SOFT-LABEL: fcanonicalize_f32:
-; RV64-SOFT:       # %bb.0:
-; RV64-SOFT-NEXT:    addi sp, sp, -16
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 16
-; RV64-SOFT-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    .cfi_offset ra, -8
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    .cfi_restore ra
-; RV64-SOFT-NEXT:    addi sp, sp, 16
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV64-SOFT-NEXT:    ret
-;
 ; CHECK-SOFT-RV64-LABEL: fcanonicalize_f32:
 ; CHECK-SOFT-RV64:       # %bb.0:
 ; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -16
 ; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SOFT-RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; CHECK-SOFT-RV64-NEXT:    .cfi_offset ra, -8
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; CHECK-SOFT-RV64-NEXT:    .cfi_restore ra
 ; CHECK-SOFT-RV64-NEXT:    addi sp, sp, 16
@@ -3195,8 +2165,8 @@ define float @fcanonicalize_f32(float %x) {
 ; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SOFT-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; CHECK-SOFT-RV32-NEXT:    .cfi_offset ra, -4
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; CHECK-SOFT-RV32-NEXT:    .cfi_restore ra
 ; CHECK-SOFT-RV32-NEXT:    addi sp, sp, 16
@@ -3212,132 +2182,11 @@ define float @fcanonicalize_f32(float %x) {
 ; CHECK-NOFP16-RV32:       # %bb.0:
 ; CHECK-NOFP16-RV32-NEXT:    fmin.s fa0, fa0, fa0
 ; CHECK-NOFP16-RV32-NEXT:    ret
-; RV32-SOFT-LABEL: fcanonicalize_f32:
-; RV32-SOFT:       # %bb.0:
-; RV32-SOFT-NEXT:    addi sp, sp, -16
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 16
-; RV32-SOFT-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    .cfi_offset ra, -4
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    .cfi_restore ra
-; RV32-SOFT-NEXT:    addi sp, sp, 16
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV32-SOFT-NEXT:    ret
-  %z = call float @llvm.canonicalize.f32(float %x)
-  ret float %z
-}
-
-define float @fcanonicalize_f32_nnan(float %x) {
-; RV64-SOFT-LABEL: fcanonicalize_f32_nnan:
-; RV64-SOFT:       # %bb.0:
-; RV64-SOFT-NEXT:    addi sp, sp, -16
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 16
-; RV64-SOFT-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    .cfi_offset ra, -8
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    .cfi_restore ra
-; RV64-SOFT-NEXT:    addi sp, sp, 16
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV64-SOFT-NEXT:    ret
-;
-; CHECK-SOFT-RV64-LABEL: fcanonicalize_f32_nnan:
-; CHECK-SOFT-RV64:       # %bb.0:
-; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -16
-; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-SOFT-RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; CHECK-SOFT-RV64-NEXT:    .cfi_offset ra, -8
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
-; CHECK-SOFT-RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
-; CHECK-SOFT-RV64-NEXT:    .cfi_restore ra
-; CHECK-SOFT-RV64-NEXT:    addi sp, sp, 16
-; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-SOFT-RV64-NEXT:    ret
-;
-; CHECK-FP16-RV64-LABEL: fcanonicalize_f32_nnan:
-; CHECK-FP16-RV64:       # %bb.0:
-; CHECK-FP16-RV64-NEXT:    fmin.s fa0, fa0, fa0
-; CHECK-FP16-RV64-NEXT:    ret
-;
-; CHECK-NOFP16-RV64-LABEL: fcanonicalize_f32_nnan:
-; CHECK-NOFP16-RV64:       # %bb.0:
-; CHECK-NOFP16-RV64-NEXT:    fmin.s fa0, fa0, fa0
-; CHECK-NOFP16-RV64-NEXT:    ret
-;
-; CHECK-SOFT-RV32-LABEL: fcanonicalize_f32_nnan:
-; CHECK-SOFT-RV32:       # %bb.0:
-; CHECK-SOFT-RV32-NEXT:    addi sp, sp, -16
-; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-SOFT-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK-SOFT-RV32-NEXT:    .cfi_offset ra, -4
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
-; CHECK-SOFT-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; CHECK-SOFT-RV32-NEXT:    .cfi_restore ra
-; CHECK-SOFT-RV32-NEXT:    addi sp, sp, 16
-; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-SOFT-RV32-NEXT:    ret
-;
-; CHECK-FP16-RV32-LABEL: fcanonicalize_f32_nnan:
-; CHECK-FP16-RV32:       # %bb.0:
-; CHECK-FP16-RV32-NEXT:    fmin.s fa0, fa0, fa0
-; CHECK-FP16-RV32-NEXT:    ret
-;
-; CHECK-NOFP16-RV32-LABEL: fcanonicalize_f32_nnan:
-; CHECK-NOFP16-RV32:       # %bb.0:
-; CHECK-NOFP16-RV32-NEXT:    fmin.s fa0, fa0, fa0
-; CHECK-NOFP16-RV32-NEXT:    ret
-; RV32-SOFT-LABEL: fcanonicalize_f32_nnan:
-; RV32-SOFT:       # %bb.0:
-; RV32-SOFT-NEXT:    addi sp, sp, -16
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 16
-; RV32-SOFT-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    .cfi_offset ra, -4
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    .cfi_restore ra
-; RV32-SOFT-NEXT:    addi sp, sp, 16
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV32-SOFT-NEXT:    ret
   %z = call nnan float @llvm.canonicalize.f32(float %x)
   ret float %z
 }
 
 define <2 x float> @fcanonicalize_v2f32(<2 x float> %x) {
-; RV64-SOFT-LABEL: fcanonicalize_v2f32:
-; RV64-SOFT:       # %bb.0:
-; RV64-SOFT-NEXT:    addi sp, sp, -32
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 32
-; RV64-SOFT-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    .cfi_offset ra, -8
-; RV64-SOFT-NEXT:    .cfi_offset s0, -16
-; RV64-SOFT-NEXT:    .cfi_offset s1, -24
-; RV64-SOFT-NEXT:    mv s0, a1
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    mv s1, a0
-; RV64-SOFT-NEXT:    mv a0, s0
-; RV64-SOFT-NEXT:    mv a1, s0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    mv a0, s1
-; RV64-SOFT-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    .cfi_restore ra
-; RV64-SOFT-NEXT:    .cfi_restore s0
-; RV64-SOFT-NEXT:    .cfi_restore s1
-; RV64-SOFT-NEXT:    addi sp, sp, 32
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV64-SOFT-NEXT:    ret
-;
 ; CHECK-SOFT-RV64-LABEL: fcanonicalize_v2f32:
 ; CHECK-SOFT-RV64:       # %bb.0:
 ; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -32
@@ -3349,12 +2198,12 @@ define <2 x float> @fcanonicalize_v2f32(<2 x float> %x) {
 ; CHECK-SOFT-RV64-NEXT:    .cfi_offset s0, -16
 ; CHECK-SOFT-RV64-NEXT:    .cfi_offset s1, -24
 ; CHECK-SOFT-RV64-NEXT:    mv s0, a1
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    mv s1, a0
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s0
-; CHECK-SOFT-RV64-NEXT:    mv a1, s0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    mv a1, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s1
 ; CHECK-SOFT-RV64-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
@@ -3390,12 +2239,12 @@ define <2 x float> @fcanonicalize_v2f32(<2 x float> %x) {
 ; CHECK-SOFT-RV32-NEXT:    .cfi_offset s0, -8
 ; CHECK-SOFT-RV32-NEXT:    .cfi_offset s1, -12
 ; CHECK-SOFT-RV32-NEXT:    mv s0, a1
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    mv s1, a0
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s0
-; CHECK-SOFT-RV32-NEXT:    mv a1, s0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    mv a1, a0
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s1
 ; CHECK-SOFT-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
@@ -3419,68 +2268,11 @@ define <2 x float> @fcanonicalize_v2f32(<2 x float> %x) {
 ; CHECK-NOFP16-RV32-NEXT:    fmin.s fa0, fa0, fa0
 ; CHECK-NOFP16-RV32-NEXT:    fmin.s fa1, fa1, fa1
 ; CHECK-NOFP16-RV32-NEXT:    ret
-; RV32-SOFT-LABEL: fcanonicalize_v2f32:
-; RV32-SOFT:       # %bb.0:
-; RV32-SOFT-NEXT:    addi sp, sp, -16
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 16
-; RV32-SOFT-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    .cfi_offset ra, -4
-; RV32-SOFT-NEXT:    .cfi_offset s0, -8
-; RV32-SOFT-NEXT:    .cfi_offset s1, -12
-; RV32-SOFT-NEXT:    mv s0, a1
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    mv s1, a0
-; RV32-SOFT-NEXT:    mv a0, s0
-; RV32-SOFT-NEXT:    mv a1, s0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    mv a0, s1
-; RV32-SOFT-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    .cfi_restore ra
-; RV32-SOFT-NEXT:    .cfi_restore s0
-; RV32-SOFT-NEXT:    .cfi_restore s1
-; RV32-SOFT-NEXT:    addi sp, sp, 16
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV32-SOFT-NEXT:    ret
   %z = call <2 x float> @llvm.canonicalize.v2f32(<2 x float> %x)
   ret <2 x float> %z
 }
 
 define <2 x float> @fcanonicalize_v2f32_nnan(<2 x float> %x) {
-; RV64-SOFT-LABEL: fcanonicalize_v2f32_nnan:
-; RV64-SOFT:       # %bb.0:
-; RV64-SOFT-NEXT:    addi sp, sp, -32
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 32
-; RV64-SOFT-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    .cfi_offset ra, -8
-; RV64-SOFT-NEXT:    .cfi_offset s0, -16
-; RV64-SOFT-NEXT:    .cfi_offset s1, -24
-; RV64-SOFT-NEXT:    mv s0, a1
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    mv s1, a0
-; RV64-SOFT-NEXT:    mv a0, s0
-; RV64-SOFT-NEXT:    mv a1, s0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    mv a0, s1
-; RV64-SOFT-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    .cfi_restore ra
-; RV64-SOFT-NEXT:    .cfi_restore s0
-; RV64-SOFT-NEXT:    .cfi_restore s1
-; RV64-SOFT-NEXT:    addi sp, sp, 32
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV64-SOFT-NEXT:    ret
-;
 ; CHECK-SOFT-RV64-LABEL: fcanonicalize_v2f32_nnan:
 ; CHECK-SOFT-RV64:       # %bb.0:
 ; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -32
@@ -3492,12 +2284,12 @@ define <2 x float> @fcanonicalize_v2f32_nnan(<2 x float> %x) {
 ; CHECK-SOFT-RV64-NEXT:    .cfi_offset s0, -16
 ; CHECK-SOFT-RV64-NEXT:    .cfi_offset s1, -24
 ; CHECK-SOFT-RV64-NEXT:    mv s0, a1
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    mv s1, a0
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s0
-; CHECK-SOFT-RV64-NEXT:    mv a1, s0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    mv a1, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s1
 ; CHECK-SOFT-RV64-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
@@ -3533,12 +2325,12 @@ define <2 x float> @fcanonicalize_v2f32_nnan(<2 x float> %x) {
 ; CHECK-SOFT-RV32-NEXT:    .cfi_offset s0, -8
 ; CHECK-SOFT-RV32-NEXT:    .cfi_offset s1, -12
 ; CHECK-SOFT-RV32-NEXT:    mv s0, a1
-; CHECK-SOFT-RV32-NEXT:    mv a1, a0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    mv s1, a0
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s0
-; CHECK-SOFT-RV32-NEXT:    mv a1, s0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    mv a1, a0
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s1
 ; CHECK-SOFT-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
@@ -3562,95 +2354,11 @@ define <2 x float> @fcanonicalize_v2f32_nnan(<2 x float> %x) {
 ; CHECK-NOFP16-RV32-NEXT:    fmin.s fa0, fa0, fa0
 ; CHECK-NOFP16-RV32-NEXT:    fmin.s fa1, fa1, fa1
 ; CHECK-NOFP16-RV32-NEXT:    ret
-; RV32-SOFT-LABEL: fcanonicalize_v2f32_nnan:
-; RV32-SOFT:       # %bb.0:
-; RV32-SOFT-NEXT:    addi sp, sp, -16
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 16
-; RV32-SOFT-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    .cfi_offset ra, -4
-; RV32-SOFT-NEXT:    .cfi_offset s0, -8
-; RV32-SOFT-NEXT:    .cfi_offset s1, -12
-; RV32-SOFT-NEXT:    mv s0, a1
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    mv s1, a0
-; RV32-SOFT-NEXT:    mv a0, s0
-; RV32-SOFT-NEXT:    mv a1, s0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    mv a1, a0
-; RV32-SOFT-NEXT:    mv a0, s1
-; RV32-SOFT-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    .cfi_restore ra
-; RV32-SOFT-NEXT:    .cfi_restore s0
-; RV32-SOFT-NEXT:    .cfi_restore s1
-; RV32-SOFT-NEXT:    addi sp, sp, 16
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV32-SOFT-NEXT:    ret
   %z = call nnan <2 x float> @llvm.canonicalize.v2f32(<2 x float> %x)
   ret <2 x float> %z
 }
 
 define <4 x float> @fcanonicalize_v4f32(<4 x float> %x) {
-; RV64-SOFT-LABEL: fcanonicalize_v4f32:
-; RV64-SOFT:       # %bb.0:
-; RV64-SOFT-NEXT:    addi sp, sp, -48
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 48
-; RV64-SOFT-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    .cfi_offset ra, -8
-; RV64-SOFT-NEXT:    .cfi_offset s0, -16
-; RV64-SOFT-NEXT:    .cfi_offset s1, -24
-; RV64-SOFT-NEXT:    .cfi_offset s2, -32
-; RV64-SOFT-NEXT:    .cfi_offset s3, -40
-; RV64-SOFT-NEXT:    .cfi_offset s4, -48
-; RV64-SOFT-NEXT:    lw a2, 0(a1)
-; RV64-SOFT-NEXT:    lw s0, 8(a1)
-; RV64-SOFT-NEXT:    lw s1, 16(a1)
-; RV64-SOFT-NEXT:    lw s2, 24(a1)
-; RV64-SOFT-NEXT:    mv s3, a0
-; RV64-SOFT-NEXT:    mv a0, a2
-; RV64-SOFT-NEXT:    mv a1, a2
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    mv s4, a0
-; RV64-SOFT-NEXT:    mv a0, s0
-; RV64-SOFT-NEXT:    mv a1, s0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    mv s0, a0
-; RV64-SOFT-NEXT:    mv a0, s1
-; RV64-SOFT-NEXT:    mv a1, s1
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    mv s1, a0
-; RV64-SOFT-NEXT:    mv a0, s2
-; RV64-SOFT-NEXT:    mv a1, s2
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    sw s4, 0(s3)
-; RV64-SOFT-NEXT:    sw s0, 4(s3)
-; RV64-SOFT-NEXT:    sw s1, 8(s3)
-; RV64-SOFT-NEXT:    sw a0, 12(s3)
-; RV64-SOFT-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    .cfi_restore ra
-; RV64-SOFT-NEXT:    .cfi_restore s0
-; RV64-SOFT-NEXT:    .cfi_restore s1
-; RV64-SOFT-NEXT:    .cfi_restore s2
-; RV64-SOFT-NEXT:    .cfi_restore s3
-; RV64-SOFT-NEXT:    .cfi_restore s4
-; RV64-SOFT-NEXT:    addi sp, sp, 48
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV64-SOFT-NEXT:    ret
-;
 ; CHECK-SOFT-RV64-LABEL: fcanonicalize_v4f32:
 ; CHECK-SOFT-RV64:       # %bb.0:
 ; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -48
@@ -3672,21 +2380,21 @@ define <4 x float> @fcanonicalize_v4f32(<4 x float> %x) {
 ; CHECK-SOFT-RV64-NEXT:    lw s1, 16(a1)
 ; CHECK-SOFT-RV64-NEXT:    lw s2, 24(a1)
 ; CHECK-SOFT-RV64-NEXT:    mv s3, a0
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
 ; CHECK-SOFT-RV64-NEXT:    mv a0, a2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a2
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    mv s4, a0
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s0
-; CHECK-SOFT-RV64-NEXT:    mv a1, s0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    mv s0, a0
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s1
-; CHECK-SOFT-RV64-NEXT:    mv a1, s1
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    mv s1, a0
+; CHECK-SOFT-RV64-NEXT:    lui a1, 260096
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s2
-; CHECK-SOFT-RV64-NEXT:    mv a1, s2
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV64-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV64-NEXT:    sw s4, 0(s3)
 ; CHECK-SOFT-RV64-NEXT:    sw s0, 4(s3)
 ; CHECK-SOFT-RV64-NEXT:    sw s1, 8(s3)
@@ -3752,21 +2460,21 @@ define <4 x float> @fcanonicalize_v4f32(<4 x float> %x) {
 ; CHECK-SOFT-RV32-NEXT:    lw s1, 8(a1)
 ; CHECK-SOFT-RV32-NEXT:    lw s2, 12(a1)
 ; CHECK-SOFT-RV32-NEXT:    mv s3, a0
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
 ; CHECK-SOFT-RV32-NEXT:    mv a0, a2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a2
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    mv s4, a0
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s0
-; CHECK-SOFT-RV32-NEXT:    mv a1, s0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    mv s0, a0
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s1
-; CHECK-SOFT-RV32-NEXT:    mv a1, s1
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    mv s1, a0
+; CHECK-SOFT-RV32-NEXT:    lui a1, 260096
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s2
-; CHECK-SOFT-RV32-NEXT:    mv a1, s2
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
+; CHECK-SOFT-RV32-NEXT:    call __mulsf3
 ; CHECK-SOFT-RV32-NEXT:    sw s4, 0(s3)
 ; CHECK-SOFT-RV32-NEXT:    sw s0, 4(s3)
 ; CHECK-SOFT-RV32-NEXT:    sw s1, 8(s3)
@@ -3810,363 +2518,20 @@ define <4 x float> @fcanonicalize_v4f32(<4 x float> %x) {
 ; CHECK-NOFP16-RV32-NEXT:    fsw fa2, 8(a0)
 ; CHECK-NOFP16-RV32-NEXT:    fsw fa3, 12(a0)
 ; CHECK-NOFP16-RV32-NEXT:    ret
-; RV32-SOFT-LABEL: fcanonicalize_v4f32:
-; RV32-SOFT:       # %bb.0:
-; RV32-SOFT-NEXT:    addi sp, sp, -32
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 32
-; RV32-SOFT-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    .cfi_offset ra, -4
-; RV32-SOFT-NEXT:    .cfi_offset s0, -8
-; RV32-SOFT-NEXT:    .cfi_offset s1, -12
-; RV32-SOFT-NEXT:    .cfi_offset s2, -16
-; RV32-SOFT-NEXT:    .cfi_offset s3, -20
-; RV32-SOFT-NEXT:    .cfi_offset s4, -24
-; RV32-SOFT-NEXT:    lw a2, 0(a1)
-; RV32-SOFT-NEXT:    lw s0, 4(a1)
-; RV32-SOFT-NEXT:    lw s1, 8(a1)
-; RV32-SOFT-NEXT:    lw s2, 12(a1)
-; RV32-SOFT-NEXT:    mv s3, a0
-; RV32-SOFT-NEXT:    mv a0, a2
-; RV32-SOFT-NEXT:    mv a1, a2
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    mv s4, a0
-; RV32-SOFT-NEXT:    mv a0, s0
-; RV32-SOFT-NEXT:    mv a1, s0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    mv s0, a0
-; RV32-SOFT-NEXT:    mv a0, s1
-; RV32-SOFT-NEXT:    mv a1, s1
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    mv s1, a0
-; RV32-SOFT-NEXT:    mv a0, s2
-; RV32-SOFT-NEXT:    mv a1, s2
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    sw s4, 0(s3)
-; RV32-SOFT-NEXT:    sw s0, 4(s3)
-; RV32-SOFT-NEXT:    sw s1, 8(s3)
-; RV32-SOFT-NEXT:    sw a0, 12(s3)
-; RV32-SOFT-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    .cfi_restore ra
-; RV32-SOFT-NEXT:    .cfi_restore s0
-; RV32-SOFT-NEXT:    .cfi_restore s1
-; RV32-SOFT-NEXT:    .cfi_restore s2
-; RV32-SOFT-NEXT:    .cfi_restore s3
-; RV32-SOFT-NEXT:    .cfi_restore s4
-; RV32-SOFT-NEXT:    addi sp, sp, 32
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV32-SOFT-NEXT:    ret
-  %z = call <4 x float> @llvm.canonicalize.v4f32(<4 x float> %x)
-  ret <4 x float> %z
-}
-
-define <4 x float> @fcanonicalize_v4f32_nnan(<4 x float> %x) {
-; RV64-SOFT-LABEL: fcanonicalize_v4f32_nnan:
-; RV64-SOFT:       # %bb.0:
-; RV64-SOFT-NEXT:    addi sp, sp, -48
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 48
-; RV64-SOFT-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    .cfi_offset ra, -8
-; RV64-SOFT-NEXT:    .cfi_offset s0, -16
-; RV64-SOFT-NEXT:    .cfi_offset s1, -24
-; RV64-SOFT-NEXT:    .cfi_offset s2, -32
-; RV64-SOFT-NEXT:    .cfi_offset s3, -40
-; RV64-SOFT-NEXT:    .cfi_offset s4, -48
-; RV64-SOFT-NEXT:    lw a2, 0(a1)
-; RV64-SOFT-NEXT:    lw s0, 8(a1)
-; RV64-SOFT-NEXT:    lw s1, 16(a1)
-; RV64-SOFT-NEXT:    lw s2, 24(a1)
-; RV64-SOFT-NEXT:    mv s3, a0
-; RV64-SOFT-NEXT:    mv a0, a2
-; RV64-SOFT-NEXT:    mv a1, a2
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    mv s4, a0
-; RV64-SOFT-NEXT:    mv a0, s0
-; RV64-SOFT-NEXT:    mv a1, s0
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    mv s0, a0
-; RV64-SOFT-NEXT:    mv a0, s1
-; RV64-SOFT-NEXT:    mv a1, s1
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    mv s1, a0
-; RV64-SOFT-NEXT:    mv a0, s2
-; RV64-SOFT-NEXT:    mv a1, s2
-; RV64-SOFT-NEXT:    call fminimum_numf
-; RV64-SOFT-NEXT:    sw s4, 0(s3)
-; RV64-SOFT-NEXT:    sw s0, 4(s3)
-; RV64-SOFT-NEXT:    sw s1, 8(s3)
-; RV64-SOFT-NEXT:    sw a0, 12(s3)
-; RV64-SOFT-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    .cfi_restore ra
-; RV64-SOFT-NEXT:    .cfi_restore s0
-; RV64-SOFT-NEXT:    .cfi_restore s1
-; RV64-SOFT-NEXT:    .cfi_restore s2
-; RV64-SOFT-NEXT:    .cfi_restore s3
-; RV64-SOFT-NEXT:    .cfi_restore s4
-; RV64-SOFT-NEXT:    addi sp, sp, 48
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV64-SOFT-NEXT:    ret
-;
-; CHECK-SOFT-RV64-LABEL: fcanonicalize_v4f32_nnan:
-; CHECK-SOFT-RV64:       # %bb.0:
-; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -48
-; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 48
-; CHECK-SOFT-RV64-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
-; CHECK-SOFT-RV64-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
-; CHECK-SOFT-RV64-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
-; CHECK-SOFT-RV64-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
-; CHECK-SOFT-RV64-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; CHECK-SOFT-RV64-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
-; CHECK-SOFT-RV64-NEXT:    .cfi_offset ra, -8
-; CHECK-SOFT-RV64-NEXT:    .cfi_offset s0, -16
-; CHECK-SOFT-RV64-NEXT:    .cfi_offset s1, -24
-; CHECK-SOFT-RV64-NEXT:    .cfi_offset s2, -32
-; CHECK-SOFT-RV64-NEXT:    .cfi_offset s3, -40
-; CHECK-SOFT-RV64-NEXT:    .cfi_offset s4, -48
-; CHECK-SOFT-RV64-NEXT:    lw a2, 0(a1)
-; CHECK-SOFT-RV64-NEXT:    lw s0, 8(a1)
-; CHECK-SOFT-RV64-NEXT:    lw s1, 16(a1)
-; CHECK-SOFT-RV64-NEXT:    lw s2, 24(a1)
-; CHECK-SOFT-RV64-NEXT:    mv s3, a0
-; CHECK-SOFT-RV64-NEXT:    mv a0, a2
-; CHECK-SOFT-RV64-NEXT:    mv a1, a2
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
-; CHECK-SOFT-RV64-NEXT:    mv s4, a0
-; CHECK-SOFT-RV64-NEXT:    mv a0, s0
-; CHECK-SOFT-RV64-NEXT:    mv a1, s0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
-; CHECK-SOFT-RV64-NEXT:    mv s0, a0
-; CHECK-SOFT-RV64-NEXT:    mv a0, s1
-; CHECK-SOFT-RV64-NEXT:    mv a1, s1
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
-; CHECK-SOFT-RV64-NEXT:    mv s1, a0
-; CHECK-SOFT-RV64-NEXT:    mv a0, s2
-; CHECK-SOFT-RV64-NEXT:    mv a1, s2
-; CHECK-SOFT-RV64-NEXT:    call fminimum_numf
-; CHECK-SOFT-RV64-NEXT:    sw s4, 0(s3)
-; CHECK-SOFT-RV64-NEXT:    sw s0, 4(s3)
-; CHECK-SOFT-RV64-NEXT:    sw s1, 8(s3)
-; CHECK-SOFT-RV64-NEXT:    sw a0, 12(s3)
-; CHECK-SOFT-RV64-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
-; CHECK-SOFT-RV64-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
-; CHECK-SOFT-RV64-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; CHECK-SOFT-RV64-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; CHECK-SOFT-RV64-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; CHECK-SOFT-RV64-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
-; CHECK-SOFT-RV64-NEXT:    .cfi_restore ra
-; CHECK-SOFT-RV64-NEXT:    .cfi_restore s0
-; CHECK-SOFT-RV64-NEXT:    .cfi_restore s1
-; CHECK-SOFT-RV64-NEXT:    .cfi_restore s2
-; CHECK-SOFT-RV64-NEXT:    .cfi_restore s3
-; CHECK-SOFT-RV64-NEXT:    .cfi_restore s4
-; CHECK-SOFT-RV64-NEXT:    addi sp, sp, 48
-; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-SOFT-RV64-NEXT:    ret
-;
-; CHECK-FP16-RV64-LABEL: fcanonicalize_v4f32_nnan:
-; CHECK-FP16-RV64:       # %bb.0:
-; CHECK-FP16-RV64-NEXT:    fmin.s fa5, fa0, fa0
-; CHECK-FP16-RV64-NEXT:    fmin.s fa4, fa1, fa1
-; CHECK-FP16-RV64-NEXT:    fmin.s fa2, fa2, fa2
-; CHECK-FP16-RV64-NEXT:    fmin.s fa3, fa3, fa3
-; CHECK-FP16-RV64-NEXT:    fsw fa5, 0(a0)
-; CHECK-FP16-RV64-NEXT:    fsw fa4, 4(a0)
-; CHECK-FP16-RV64-NEXT:    fsw fa2, 8(a0)
-; CHECK-FP16-RV64-NEXT:    fsw fa3, 12(a0)
-; CHECK-FP16-RV64-NEXT:    ret
-;
-; CHECK-NOFP16-RV64-LABEL: fcanonicalize_v4f32_nnan:
-; CHECK-NOFP16-RV64:       # %bb.0:
-; CHECK-NOFP16-RV64-NEXT:    fmin.s fa5, fa0, fa0
-; CHECK-NOFP16-RV64-NEXT:    fmin.s fa4, fa1, fa1
-; CHECK-NOFP16-RV64-NEXT:    fmin.s fa2, fa2, fa2
-; CHECK-NOFP16-RV64-NEXT:    fmin.s fa3, fa3, fa3
-; CHECK-NOFP16-RV64-NEXT:    fsw fa5, 0(a0)
-; CHECK-NOFP16-RV64-NEXT:    fsw fa4, 4(a0)
-; CHECK-NOFP16-RV64-NEXT:    fsw fa2, 8(a0)
-; CHECK-NOFP16-RV64-NEXT:    fsw fa3, 12(a0)
-; CHECK-NOFP16-RV64-NEXT:    ret
-;
-; CHECK-SOFT-RV32-LABEL: fcanonicalize_v4f32_nnan:
-; CHECK-SOFT-RV32:       # %bb.0:
-; CHECK-SOFT-RV32-NEXT:    addi sp, sp, -32
-; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-SOFT-RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
-; CHECK-SOFT-RV32-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
-; CHECK-SOFT-RV32-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
-; CHECK-SOFT-RV32-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
-; CHECK-SOFT-RV32-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; CHECK-SOFT-RV32-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; CHECK-SOFT-RV32-NEXT:    .cfi_offset ra, -4
-; CHECK-SOFT-RV32-NEXT:    .cfi_offset s0, -8
-; CHECK-SOFT-RV32-NEXT:    .cfi_offset s1, -12
-; CHECK-SOFT-RV32-NEXT:    .cfi_offset s2, -16
-; CHECK-SOFT-RV32-NEXT:    .cfi_offset s3, -20
-; CHECK-SOFT-RV32-NEXT:    .cfi_offset s4, -24
-; CHECK-SOFT-RV32-NEXT:    lw a2, 0(a1)
-; CHECK-SOFT-RV32-NEXT:    lw s0, 4(a1)
-; CHECK-SOFT-RV32-NEXT:    lw s1, 8(a1)
-; CHECK-SOFT-RV32-NEXT:    lw s2, 12(a1)
-; CHECK-SOFT-RV32-NEXT:    mv s3, a0
-; CHECK-SOFT-RV32-NEXT:    mv a0, a2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a2
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
-; CHECK-SOFT-RV32-NEXT:    mv s4, a0
-; CHECK-SOFT-RV32-NEXT:    mv a0, s0
-; CHECK-SOFT-RV32-NEXT:    mv a1, s0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
-; CHECK-SOFT-RV32-NEXT:    mv s0, a0
-; CHECK-SOFT-RV32-NEXT:    mv a0, s1
-; CHECK-SOFT-RV32-NEXT:    mv a1, s1
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
-; CHECK-SOFT-RV32-NEXT:    mv s1, a0
-; CHECK-SOFT-RV32-NEXT:    mv a0, s2
-; CHECK-SOFT-RV32-NEXT:    mv a1, s2
-; CHECK-SOFT-RV32-NEXT:    call fminimum_numf
-; CHECK-SOFT-RV32-NEXT:    sw s4, 0(s3)
-; CHECK-SOFT-RV32-NEXT:    sw s0, 4(s3)
-; CHECK-SOFT-RV32-NEXT:    sw s1, 8(s3)
-; CHECK-SOFT-RV32-NEXT:    sw a0, 12(s3)
-; CHECK-SOFT-RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; CHECK-SOFT-RV32-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
-; CHECK-SOFT-RV32-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; CHECK-SOFT-RV32-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; CHECK-SOFT-RV32-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; CHECK-SOFT-RV32-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; CHECK-SOFT-RV32-NEXT:    .cfi_restore ra
-; CHECK-SOFT-RV32-NEXT:    .cfi_restore s0
-; CHECK-SOFT-RV32-NEXT:    .cfi_restore s1
-; CHECK-SOFT-RV32-NEXT:    .cfi_restore s2
-; CHECK-SOFT-RV32-NEXT:    .cfi_restore s3
-; CHECK-SOFT-RV32-NEXT:    .cfi_restore s4
-; CHECK-SOFT-RV32-NEXT:    addi sp, sp, 32
-; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 0
-; CHECK-SOFT-RV32-NEXT:    ret
-;
-; CHECK-FP16-RV32-LABEL: fcanonicalize_v4f32_nnan:
-; CHECK-FP16-RV32:       # %bb.0:
-; CHECK-FP16-RV32-NEXT:    fmin.s fa5, fa0, fa0
-; CHECK-FP16-RV32-NEXT:    fmin.s fa4, fa1, fa1
-; CHECK-FP16-RV32-NEXT:    fmin.s fa2, fa2, fa2
-; CHECK-FP16-RV32-NEXT:    fmin.s fa3, fa3, fa3
-; CHECK-FP16-RV32-NEXT:    fsw fa5, 0(a0)
-; CHECK-FP16-RV32-NEXT:    fsw fa4, 4(a0)
-; CHECK-FP16-RV32-NEXT:    fsw fa2, 8(a0)
-; CHECK-FP16-RV32-NEXT:    fsw fa3, 12(a0)
-; CHECK-FP16-RV32-NEXT:    ret
-;
-; CHECK-NOFP16-RV32-LABEL: fcanonicalize_v4f32_nnan:
-; CHECK-NOFP16-RV32:       # %bb.0:
-; CHECK-NOFP16-RV32-NEXT:    fmin.s fa5, fa0, fa0
-; CHECK-NOFP16-RV32-NEXT:    fmin.s fa4, fa1, fa1
-; CHECK-NOFP16-RV32-NEXT:    fmin.s fa2, fa2, fa2
-; CHECK-NOFP16-RV32-NEXT:    fmin.s fa3, fa3, fa3
-; CHECK-NOFP16-RV32-NEXT:    fsw fa5, 0(a0)
-; CHECK-NOFP16-RV32-NEXT:    fsw fa4, 4(a0)
-; CHECK-NOFP16-RV32-NEXT:    fsw fa2, 8(a0)
-; CHECK-NOFP16-RV32-NEXT:    fsw fa3, 12(a0)
-; CHECK-NOFP16-RV32-NEXT:    ret
-; RV32-SOFT-LABEL: fcanonicalize_v4f32_nnan:
-; RV32-SOFT:       # %bb.0:
-; RV32-SOFT-NEXT:    addi sp, sp, -32
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 32
-; RV32-SOFT-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    .cfi_offset ra, -4
-; RV32-SOFT-NEXT:    .cfi_offset s0, -8
-; RV32-SOFT-NEXT:    .cfi_offset s1, -12
-; RV32-SOFT-NEXT:    .cfi_offset s2, -16
-; RV32-SOFT-NEXT:    .cfi_offset s3, -20
-; RV32-SOFT-NEXT:    .cfi_offset s4, -24
-; RV32-SOFT-NEXT:    lw a2, 0(a1)
-; RV32-SOFT-NEXT:    lw s0, 4(a1)
-; RV32-SOFT-NEXT:    lw s1, 8(a1)
-; RV32-SOFT-NEXT:    lw s2, 12(a1)
-; RV32-SOFT-NEXT:    mv s3, a0
-; RV32-SOFT-NEXT:    mv a0, a2
-; RV32-SOFT-NEXT:    mv a1, a2
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    mv s4, a0
-; RV32-SOFT-NEXT:    mv a0, s0
-; RV32-SOFT-NEXT:    mv a1, s0
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    mv s0, a0
-; RV32-SOFT-NEXT:    mv a0, s1
-; RV32-SOFT-NEXT:    mv a1, s1
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    mv s1, a0
-; RV32-SOFT-NEXT:    mv a0, s2
-; RV32-SOFT-NEXT:    mv a1, s2
-; RV32-SOFT-NEXT:    call fminimum_numf
-; RV32-SOFT-NEXT:    sw s4, 0(s3)
-; RV32-SOFT-NEXT:    sw s0, 4(s3)
-; RV32-SOFT-NEXT:    sw s1, 8(s3)
-; RV32-SOFT-NEXT:    sw a0, 12(s3)
-; RV32-SOFT-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    .cfi_restore ra
-; RV32-SOFT-NEXT:    .cfi_restore s0
-; RV32-SOFT-NEXT:    .cfi_restore s1
-; RV32-SOFT-NEXT:    .cfi_restore s2
-; RV32-SOFT-NEXT:    .cfi_restore s3
-; RV32-SOFT-NEXT:    .cfi_restore s4
-; RV32-SOFT-NEXT:    addi sp, sp, 32
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV32-SOFT-NEXT:    ret
   %z = call nnan <4 x float> @llvm.canonicalize.v4f32(<4 x float> %x)
   ret <4 x float> %z
 }
 
 define double @fcanonicalize_f64(double %x) {
-; RV64-SOFT-LABEL: fcanonicalize_f64:
-; RV64-SOFT:       # %bb.0:
-; RV64-SOFT-NEXT:    addi sp, sp, -16
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 16
-; RV64-SOFT-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    .cfi_offset ra, -8
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_num
-; RV64-SOFT-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    .cfi_restore ra
-; RV64-SOFT-NEXT:    addi sp, sp, 16
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV64-SOFT-NEXT:    ret
-;
 ; CHECK-SOFT-RV64-LABEL: fcanonicalize_f64:
 ; CHECK-SOFT-RV64:       # %bb.0:
 ; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -16
 ; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SOFT-RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; CHECK-SOFT-RV64-NEXT:    .cfi_offset ra, -8
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_num
+; CHECK-SOFT-RV64-NEXT:    li a1, 1023
+; CHECK-SOFT-RV64-NEXT:    slli a1, a1, 52
+; CHECK-SOFT-RV64-NEXT:    call __muldf3
 ; CHECK-SOFT-RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; CHECK-SOFT-RV64-NEXT:    .cfi_restore ra
 ; CHECK-SOFT-RV64-NEXT:    addi sp, sp, 16
@@ -4189,9 +2554,9 @@ define double @fcanonicalize_f64(double %x) {
 ; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SOFT-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; CHECK-SOFT-RV32-NEXT:    .cfi_offset ra, -4
-; CHECK-SOFT-RV32-NEXT:    mv a2, a0
-; CHECK-SOFT-RV32-NEXT:    mv a3, a1
-; CHECK-SOFT-RV32-NEXT:    call fminimum_num
+; CHECK-SOFT-RV32-NEXT:    lui a3, 261888
+; CHECK-SOFT-RV32-NEXT:    li a2, 0
+; CHECK-SOFT-RV32-NEXT:    call __muldf3
 ; CHECK-SOFT-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; CHECK-SOFT-RV32-NEXT:    .cfi_restore ra
 ; CHECK-SOFT-RV32-NEXT:    addi sp, sp, 16
@@ -4207,47 +2572,20 @@ define double @fcanonicalize_f64(double %x) {
 ; CHECK-NOFP16-RV32:       # %bb.0:
 ; CHECK-NOFP16-RV32-NEXT:    fmin.d fa0, fa0, fa0
 ; CHECK-NOFP16-RV32-NEXT:    ret
-; RV32-SOFT-LABEL: fcanonicalize_f64:
-; RV32-SOFT:       # %bb.0:
-; RV32-SOFT-NEXT:    addi sp, sp, -16
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 16
-; RV32-SOFT-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    .cfi_offset ra, -4
-; RV32-SOFT-NEXT:    mv a2, a0
-; RV32-SOFT-NEXT:    mv a3, a1
-; RV32-SOFT-NEXT:    call fminimum_num
-; RV32-SOFT-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    .cfi_restore ra
-; RV32-SOFT-NEXT:    addi sp, sp, 16
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV32-SOFT-NEXT:    ret
   %z = call double @llvm.canonicalize.f64(double %x)
   ret double %z
 }
 
 define double @fcanonicalize_f64_nnan(double %x) {
-; RV64-SOFT-LABEL: fcanonicalize_f64_nnan:
-; RV64-SOFT:       # %bb.0:
-; RV64-SOFT-NEXT:    addi sp, sp, -16
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 16
-; RV64-SOFT-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    .cfi_offset ra, -8
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_num
-; RV64-SOFT-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    .cfi_restore ra
-; RV64-SOFT-NEXT:    addi sp, sp, 16
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV64-SOFT-NEXT:    ret
-;
 ; CHECK-SOFT-RV64-LABEL: fcanonicalize_f64_nnan:
 ; CHECK-SOFT-RV64:       # %bb.0:
 ; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -16
 ; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SOFT-RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; CHECK-SOFT-RV64-NEXT:    .cfi_offset ra, -8
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_num
+; CHECK-SOFT-RV64-NEXT:    li a1, 1023
+; CHECK-SOFT-RV64-NEXT:    slli a1, a1, 52
+; CHECK-SOFT-RV64-NEXT:    call __muldf3
 ; CHECK-SOFT-RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; CHECK-SOFT-RV64-NEXT:    .cfi_restore ra
 ; CHECK-SOFT-RV64-NEXT:    addi sp, sp, 16
@@ -4270,9 +2608,9 @@ define double @fcanonicalize_f64_nnan(double %x) {
 ; CHECK-SOFT-RV32-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SOFT-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; CHECK-SOFT-RV32-NEXT:    .cfi_offset ra, -4
-; CHECK-SOFT-RV32-NEXT:    mv a2, a0
-; CHECK-SOFT-RV32-NEXT:    mv a3, a1
-; CHECK-SOFT-RV32-NEXT:    call fminimum_num
+; CHECK-SOFT-RV32-NEXT:    lui a3, 261888
+; CHECK-SOFT-RV32-NEXT:    li a2, 0
+; CHECK-SOFT-RV32-NEXT:    call __muldf3
 ; CHECK-SOFT-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; CHECK-SOFT-RV32-NEXT:    .cfi_restore ra
 ; CHECK-SOFT-RV32-NEXT:    addi sp, sp, 16
@@ -4288,54 +2626,11 @@ define double @fcanonicalize_f64_nnan(double %x) {
 ; CHECK-NOFP16-RV32:       # %bb.0:
 ; CHECK-NOFP16-RV32-NEXT:    fmin.d fa0, fa0, fa0
 ; CHECK-NOFP16-RV32-NEXT:    ret
-; RV32-SOFT-LABEL: fcanonicalize_f64_nnan:
-; RV32-SOFT:       # %bb.0:
-; RV32-SOFT-NEXT:    addi sp, sp, -16
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 16
-; RV32-SOFT-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    .cfi_offset ra, -4
-; RV32-SOFT-NEXT:    mv a2, a0
-; RV32-SOFT-NEXT:    mv a3, a1
-; RV32-SOFT-NEXT:    call fminimum_num
-; RV32-SOFT-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    .cfi_restore ra
-; RV32-SOFT-NEXT:    addi sp, sp, 16
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV32-SOFT-NEXT:    ret
   %z = call nnan double @llvm.canonicalize.f64(double %x)
   ret double %z
 }
 
 define <2 x double> @fcanonicalize_v2f64(<2 x double> %x) {
-; RV64-SOFT-LABEL: fcanonicalize_v2f64:
-; RV64-SOFT:       # %bb.0:
-; RV64-SOFT-NEXT:    addi sp, sp, -32
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 32
-; RV64-SOFT-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    .cfi_offset ra, -8
-; RV64-SOFT-NEXT:    .cfi_offset s0, -16
-; RV64-SOFT-NEXT:    .cfi_offset s1, -24
-; RV64-SOFT-NEXT:    mv s0, a1
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_num
-; RV64-SOFT-NEXT:    mv s1, a0
-; RV64-SOFT-NEXT:    mv a0, s0
-; RV64-SOFT-NEXT:    mv a1, s0
-; RV64-SOFT-NEXT:    call fminimum_num
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    mv a0, s1
-; RV64-SOFT-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    .cfi_restore ra
-; RV64-SOFT-NEXT:    .cfi_restore s0
-; RV64-SOFT-NEXT:    .cfi_restore s1
-; RV64-SOFT-NEXT:    addi sp, sp, 32
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV64-SOFT-NEXT:    ret
-;
 ; CHECK-SOFT-RV64-LABEL: fcanonicalize_v2f64:
 ; CHECK-SOFT-RV64:       # %bb.0:
 ; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -32
@@ -4343,24 +2638,30 @@ define <2 x double> @fcanonicalize_v2f64(<2 x double> %x) {
 ; CHECK-SOFT-RV64-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
 ; CHECK-SOFT-RV64-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; CHECK-SOFT-RV64-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
 ; CHECK-SOFT-RV64-NEXT:    .cfi_offset ra, -8
 ; CHECK-SOFT-RV64-NEXT:    .cfi_offset s0, -16
 ; CHECK-SOFT-RV64-NEXT:    .cfi_offset s1, -24
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s2, -32
 ; CHECK-SOFT-RV64-NEXT:    mv s0, a1
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_num
-; CHECK-SOFT-RV64-NEXT:    mv s1, a0
+; CHECK-SOFT-RV64-NEXT:    li s1, 1023
+; CHECK-SOFT-RV64-NEXT:    slli s1, s1, 52
+; CHECK-SOFT-RV64-NEXT:    mv a1, s1
+; CHECK-SOFT-RV64-NEXT:    call __muldf3
+; CHECK-SOFT-RV64-NEXT:    mv s2, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s0
-; CHECK-SOFT-RV64-NEXT:    mv a1, s0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_num
+; CHECK-SOFT-RV64-NEXT:    mv a1, s1
+; CHECK-SOFT-RV64-NEXT:    call __muldf3
 ; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    mv a0, s1
+; CHECK-SOFT-RV64-NEXT:    mv a0, s2
 ; CHECK-SOFT-RV64-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-SOFT-RV64-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-SOFT-RV64-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; CHECK-SOFT-RV64-NEXT:    .cfi_restore ra
 ; CHECK-SOFT-RV64-NEXT:    .cfi_restore s0
 ; CHECK-SOFT-RV64-NEXT:    .cfi_restore s1
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s2
 ; CHECK-SOFT-RV64-NEXT:    addi sp, sp, 32
 ; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-SOFT-RV64-NEXT:    ret
@@ -4394,20 +2695,22 @@ define <2 x double> @fcanonicalize_v2f64(<2 x double> %x) {
 ; CHECK-SOFT-RV32-NEXT:    .cfi_offset s3, -20
 ; CHECK-SOFT-RV32-NEXT:    .cfi_offset s4, -24
 ; CHECK-SOFT-RV32-NEXT:    lw a2, 0(a1)
-; CHECK-SOFT-RV32-NEXT:    lw a3, 4(a1)
+; CHECK-SOFT-RV32-NEXT:    lw a4, 4(a1)
 ; CHECK-SOFT-RV32-NEXT:    lw s0, 8(a1)
 ; CHECK-SOFT-RV32-NEXT:    lw s1, 12(a1)
 ; CHECK-SOFT-RV32-NEXT:    mv s2, a0
+; CHECK-SOFT-RV32-NEXT:    lui a3, 261888
 ; CHECK-SOFT-RV32-NEXT:    mv a0, a2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a3
-; CHECK-SOFT-RV32-NEXT:    call fminimum_num
+; CHECK-SOFT-RV32-NEXT:    mv a1, a4
+; CHECK-SOFT-RV32-NEXT:    li a2, 0
+; CHECK-SOFT-RV32-NEXT:    call __muldf3
 ; CHECK-SOFT-RV32-NEXT:    mv s3, a0
 ; CHECK-SOFT-RV32-NEXT:    mv s4, a1
+; CHECK-SOFT-RV32-NEXT:    lui a3, 261888
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s0
 ; CHECK-SOFT-RV32-NEXT:    mv a1, s1
-; CHECK-SOFT-RV32-NEXT:    mv a2, s0
-; CHECK-SOFT-RV32-NEXT:    mv a3, s1
-; CHECK-SOFT-RV32-NEXT:    call fminimum_num
+; CHECK-SOFT-RV32-NEXT:    li a2, 0
+; CHECK-SOFT-RV32-NEXT:    call __muldf3
 ; CHECK-SOFT-RV32-NEXT:    sw s3, 0(s2)
 ; CHECK-SOFT-RV32-NEXT:    sw s4, 4(s2)
 ; CHECK-SOFT-RV32-NEXT:    sw a0, 8(s2)
@@ -4439,90 +2742,11 @@ define <2 x double> @fcanonicalize_v2f64(<2 x double> %x) {
 ; CHECK-NOFP16-RV32-NEXT:    fmin.d fa0, fa0, fa0
 ; CHECK-NOFP16-RV32-NEXT:    fmin.d fa1, fa1, fa1
 ; CHECK-NOFP16-RV32-NEXT:    ret
-; RV32-SOFT-LABEL: fcanonicalize_v2f64:
-; RV32-SOFT:       # %bb.0:
-; RV32-SOFT-NEXT:    addi sp, sp, -32
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 32
-; RV32-SOFT-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    .cfi_offset ra, -4
-; RV32-SOFT-NEXT:    .cfi_offset s0, -8
-; RV32-SOFT-NEXT:    .cfi_offset s1, -12
-; RV32-SOFT-NEXT:    .cfi_offset s2, -16
-; RV32-SOFT-NEXT:    .cfi_offset s3, -20
-; RV32-SOFT-NEXT:    .cfi_offset s4, -24
-; RV32-SOFT-NEXT:    lw a2, 0(a1)
-; RV32-SOFT-NEXT:    lw a3, 4(a1)
-; RV32-SOFT-NEXT:    lw s0, 8(a1)
-; RV32-SOFT-NEXT:    lw s1, 12(a1)
-; RV32-SOFT-NEXT:    mv s2, a0
-; RV32-SOFT-NEXT:    mv a0, a2
-; RV32-SOFT-NEXT:    mv a1, a3
-; RV32-SOFT-NEXT:    call fminimum_num
-; RV32-SOFT-NEXT:    mv s3, a0
-; RV32-SOFT-NEXT:    mv s4, a1
-; RV32-SOFT-NEXT:    mv a0, s0
-; RV32-SOFT-NEXT:    mv a1, s1
-; RV32-SOFT-NEXT:    mv a2, s0
-; RV32-SOFT-NEXT:    mv a3, s1
-; RV32-SOFT-NEXT:    call fminimum_num
-; RV32-SOFT-NEXT:    sw s3, 0(s2)
-; RV32-SOFT-NEXT:    sw s4, 4(s2)
-; RV32-SOFT-NEXT:    sw a0, 8(s2)
-; RV32-SOFT-NEXT:    sw a1, 12(s2)
-; RV32-SOFT-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    .cfi_restore ra
-; RV32-SOFT-NEXT:    .cfi_restore s0
-; RV32-SOFT-NEXT:    .cfi_restore s1
-; RV32-SOFT-NEXT:    .cfi_restore s2
-; RV32-SOFT-NEXT:    .cfi_restore s3
-; RV32-SOFT-NEXT:    .cfi_restore s4
-; RV32-SOFT-NEXT:    addi sp, sp, 32
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV32-SOFT-NEXT:    ret
   %z = call <2 x double> @llvm.canonicalize.v2f64(<2 x double> %x)
   ret <2 x double> %z
 }
 
 define <2 x double> @fcanonicalize_v2f64_nnan(<2 x double> %x) {
-; RV64-SOFT-LABEL: fcanonicalize_v2f64_nnan:
-; RV64-SOFT:       # %bb.0:
-; RV64-SOFT-NEXT:    addi sp, sp, -32
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 32
-; RV64-SOFT-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    .cfi_offset ra, -8
-; RV64-SOFT-NEXT:    .cfi_offset s0, -16
-; RV64-SOFT-NEXT:    .cfi_offset s1, -24
-; RV64-SOFT-NEXT:    mv s0, a1
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    call fminimum_num
-; RV64-SOFT-NEXT:    mv s1, a0
-; RV64-SOFT-NEXT:    mv a0, s0
-; RV64-SOFT-NEXT:    mv a1, s0
-; RV64-SOFT-NEXT:    call fminimum_num
-; RV64-SOFT-NEXT:    mv a1, a0
-; RV64-SOFT-NEXT:    mv a0, s1
-; RV64-SOFT-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    .cfi_restore ra
-; RV64-SOFT-NEXT:    .cfi_restore s0
-; RV64-SOFT-NEXT:    .cfi_restore s1
-; RV64-SOFT-NEXT:    addi sp, sp, 32
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV64-SOFT-NEXT:    ret
-;
 ; CHECK-SOFT-RV64-LABEL: fcanonicalize_v2f64_nnan:
 ; CHECK-SOFT-RV64:       # %bb.0:
 ; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -32
@@ -4530,24 +2754,30 @@ define <2 x double> @fcanonicalize_v2f64_nnan(<2 x double> %x) {
 ; CHECK-SOFT-RV64-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
 ; CHECK-SOFT-RV64-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; CHECK-SOFT-RV64-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-SOFT-RV64-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
 ; CHECK-SOFT-RV64-NEXT:    .cfi_offset ra, -8
 ; CHECK-SOFT-RV64-NEXT:    .cfi_offset s0, -16
 ; CHECK-SOFT-RV64-NEXT:    .cfi_offset s1, -24
+; CHECK-SOFT-RV64-NEXT:    .cfi_offset s2, -32
 ; CHECK-SOFT-RV64-NEXT:    mv s0, a1
-; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_num
-; CHECK-SOFT-RV64-NEXT:    mv s1, a0
+; CHECK-SOFT-RV64-NEXT:    li s1, 1023
+; CHECK-SOFT-RV64-NEXT:    slli s1, s1, 52
+; CHECK-SOFT-RV64-NEXT:    mv a1, s1
+; CHECK-SOFT-RV64-NEXT:    call __muldf3
+; CHECK-SOFT-RV64-NEXT:    mv s2, a0
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s0
-; CHECK-SOFT-RV64-NEXT:    mv a1, s0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_num
+; CHECK-SOFT-RV64-NEXT:    mv a1, s1
+; CHECK-SOFT-RV64-NEXT:    call __muldf3
 ; CHECK-SOFT-RV64-NEXT:    mv a1, a0
-; CHECK-SOFT-RV64-NEXT:    mv a0, s1
+; CHECK-SOFT-RV64-NEXT:    mv a0, s2
 ; CHECK-SOFT-RV64-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-SOFT-RV64-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-SOFT-RV64-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-SOFT-RV64-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; CHECK-SOFT-RV64-NEXT:    .cfi_restore ra
 ; CHECK-SOFT-RV64-NEXT:    .cfi_restore s0
 ; CHECK-SOFT-RV64-NEXT:    .cfi_restore s1
+; CHECK-SOFT-RV64-NEXT:    .cfi_restore s2
 ; CHECK-SOFT-RV64-NEXT:    addi sp, sp, 32
 ; CHECK-SOFT-RV64-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-SOFT-RV64-NEXT:    ret
@@ -4581,20 +2811,22 @@ define <2 x double> @fcanonicalize_v2f64_nnan(<2 x double> %x) {
 ; CHECK-SOFT-RV32-NEXT:    .cfi_offset s3, -20
 ; CHECK-SOFT-RV32-NEXT:    .cfi_offset s4, -24
 ; CHECK-SOFT-RV32-NEXT:    lw a2, 0(a1)
-; CHECK-SOFT-RV32-NEXT:    lw a3, 4(a1)
+; CHECK-SOFT-RV32-NEXT:    lw a4, 4(a1)
 ; CHECK-SOFT-RV32-NEXT:    lw s0, 8(a1)
 ; CHECK-SOFT-RV32-NEXT:    lw s1, 12(a1)
 ; CHECK-SOFT-RV32-NEXT:    mv s2, a0
+; CHECK-SOFT-RV32-NEXT:    lui a3, 261888
 ; CHECK-SOFT-RV32-NEXT:    mv a0, a2
-; CHECK-SOFT-RV32-NEXT:    mv a1, a3
-; CHECK-SOFT-RV32-NEXT:    call fminimum_num
+; CHECK-SOFT-RV32-NEXT:    mv a1, a4
+; CHECK-SOFT-RV32-NEXT:    li a2, 0
+; CHECK-SOFT-RV32-NEXT:    call __muldf3
 ; CHECK-SOFT-RV32-NEXT:    mv s3, a0
 ; CHECK-SOFT-RV32-NEXT:    mv s4, a1
+; CHECK-SOFT-RV32-NEXT:    lui a3, 261888
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s0
 ; CHECK-SOFT-RV32-NEXT:    mv a1, s1
-; CHECK-SOFT-RV32-NEXT:    mv a2, s0
-; CHECK-SOFT-RV32-NEXT:    mv a3, s1
-; CHECK-SOFT-RV32-NEXT:    call fminimum_num
+; CHECK-SOFT-RV32-NEXT:    li a2, 0
+; CHECK-SOFT-RV32-NEXT:    call __muldf3
 ; CHECK-SOFT-RV32-NEXT:    sw s3, 0(s2)
 ; CHECK-SOFT-RV32-NEXT:    sw s4, 4(s2)
 ; CHECK-SOFT-RV32-NEXT:    sw a0, 8(s2)
@@ -4626,101 +2858,11 @@ define <2 x double> @fcanonicalize_v2f64_nnan(<2 x double> %x) {
 ; CHECK-NOFP16-RV32-NEXT:    fmin.d fa0, fa0, fa0
 ; CHECK-NOFP16-RV32-NEXT:    fmin.d fa1, fa1, fa1
 ; CHECK-NOFP16-RV32-NEXT:    ret
-; RV32-SOFT-LABEL: fcanonicalize_v2f64_nnan:
-; RV32-SOFT:       # %bb.0:
-; RV32-SOFT-NEXT:    addi sp, sp, -32
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 32
-; RV32-SOFT-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    .cfi_offset ra, -4
-; RV32-SOFT-NEXT:    .cfi_offset s0, -8
-; RV32-SOFT-NEXT:    .cfi_offset s1, -12
-; RV32-SOFT-NEXT:    .cfi_offset s2, -16
-; RV32-SOFT-NEXT:    .cfi_offset s3, -20
-; RV32-SOFT-NEXT:    .cfi_offset s4, -24
-; RV32-SOFT-NEXT:    lw a2, 0(a1)
-; RV32-SOFT-NEXT:    lw a3, 4(a1)
-; RV32-SOFT-NEXT:    lw s0, 8(a1)
-; RV32-SOFT-NEXT:    lw s1, 12(a1)
-; RV32-SOFT-NEXT:    mv s2, a0
-; RV32-SOFT-NEXT:    mv a0, a2
-; RV32-SOFT-NEXT:    mv a1, a3
-; RV32-SOFT-NEXT:    call fminimum_num
-; RV32-SOFT-NEXT:    mv s3, a0
-; RV32-SOFT-NEXT:    mv s4, a1
-; RV32-SOFT-NEXT:    mv a0, s0
-; RV32-SOFT-NEXT:    mv a1, s1
-; RV32-SOFT-NEXT:    mv a2, s0
-; RV32-SOFT-NEXT:    mv a3, s1
-; RV32-SOFT-NEXT:    call fminimum_num
-; RV32-SOFT-NEXT:    sw s3, 0(s2)
-; RV32-SOFT-NEXT:    sw s4, 4(s2)
-; RV32-SOFT-NEXT:    sw a0, 8(s2)
-; RV32-SOFT-NEXT:    sw a1, 12(s2)
-; RV32-SOFT-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    .cfi_restore ra
-; RV32-SOFT-NEXT:    .cfi_restore s0
-; RV32-SOFT-NEXT:    .cfi_restore s1
-; RV32-SOFT-NEXT:    .cfi_restore s2
-; RV32-SOFT-NEXT:    .cfi_restore s3
-; RV32-SOFT-NEXT:    .cfi_restore s4
-; RV32-SOFT-NEXT:    addi sp, sp, 32
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV32-SOFT-NEXT:    ret
   %z = call nnan <2 x double> @llvm.canonicalize.v2f64(<2 x double> %x)
   ret <2 x double> %z
 }
 
 define double @fcanonicalize_softfloat(double, double) unnamed_addr #0 {
-; RV64-SOFT-LABEL: fcanonicalize_softfloat:
-; RV64-SOFT:       # %bb.0: # %start
-; RV64-SOFT-NEXT:    addi sp, sp, -32
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 32
-; RV64-SOFT-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
-; RV64-SOFT-NEXT:    .cfi_offset ra, -8
-; RV64-SOFT-NEXT:    .cfi_offset s0, -16
-; RV64-SOFT-NEXT:    .cfi_offset s1, -24
-; RV64-SOFT-NEXT:    .cfi_offset s2, -32
-; RV64-SOFT-NEXT:    mv s0, a1
-; RV64-SOFT-NEXT:    mv s1, a0
-; RV64-SOFT-NEXT:    call __ltdf2
-; RV64-SOFT-NEXT:    srli s2, a0, 63
-; RV64-SOFT-NEXT:    mv a0, s1
-; RV64-SOFT-NEXT:    mv a1, s1
-; RV64-SOFT-NEXT:    call __unorddf2
-; RV64-SOFT-NEXT:    snez a0, a0
-; RV64-SOFT-NEXT:    or a0, a0, s2
-; RV64-SOFT-NEXT:    bnez a0, .LBB18_2
-; RV64-SOFT-NEXT:  # %bb.1: # %start
-; RV64-SOFT-NEXT:    mv s0, s1
-; RV64-SOFT-NEXT:  .LBB18_2: # %start
-; RV64-SOFT-NEXT:    mv a0, s0
-; RV64-SOFT-NEXT:    mv a1, s0
-; RV64-SOFT-NEXT:    call fminimum_num
-; RV64-SOFT-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64-SOFT-NEXT:    .cfi_restore ra
-; RV64-SOFT-NEXT:    .cfi_restore s0
-; RV64-SOFT-NEXT:    .cfi_restore s1
-; RV64-SOFT-NEXT:    .cfi_restore s2
-; RV64-SOFT-NEXT:    addi sp, sp, 32
-; RV64-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV64-SOFT-NEXT:    ret
-;
 ; CHECK-SOFT-RV64-LABEL: fcanonicalize_softfloat:
 ; CHECK-SOFT-RV64:       # %bb.0: # %start
 ; CHECK-SOFT-RV64-NEXT:    addi sp, sp, -32
@@ -4742,13 +2884,14 @@ define double @fcanonicalize_softfloat(double, double) unnamed_addr #0 {
 ; CHECK-SOFT-RV64-NEXT:    call __unorddf2
 ; CHECK-SOFT-RV64-NEXT:    snez a0, a0
 ; CHECK-SOFT-RV64-NEXT:    or a0, a0, s2
-; CHECK-SOFT-RV64-NEXT:    bnez a0, .LBB18_2
+; CHECK-SOFT-RV64-NEXT:    bnez a0, .LBB15_2
 ; CHECK-SOFT-RV64-NEXT:  # %bb.1: # %start
 ; CHECK-SOFT-RV64-NEXT:    mv s0, s1
-; CHECK-SOFT-RV64-NEXT:  .LBB18_2: # %start
+; CHECK-SOFT-RV64-NEXT:  .LBB15_2: # %start
+; CHECK-SOFT-RV64-NEXT:    li a1, 1023
+; CHECK-SOFT-RV64-NEXT:    slli a1, a1, 52
 ; CHECK-SOFT-RV64-NEXT:    mv a0, s0
-; CHECK-SOFT-RV64-NEXT:    mv a1, s0
-; CHECK-SOFT-RV64-NEXT:    call fminimum_num
+; CHECK-SOFT-RV64-NEXT:    call __muldf3
 ; CHECK-SOFT-RV64-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-SOFT-RV64-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-SOFT-RV64-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
@@ -4767,10 +2910,10 @@ define double @fcanonicalize_softfloat(double, double) unnamed_addr #0 {
 ; CHECK-FP16-RV64-NEXT:    feq.d a1, fa0, fa0
 ; CHECK-FP16-RV64-NEXT:    xori a1, a1, 1
 ; CHECK-FP16-RV64-NEXT:    or a0, a1, a0
-; CHECK-FP16-RV64-NEXT:    bnez a0, .LBB18_2
+; CHECK-FP16-RV64-NEXT:    bnez a0, .LBB15_2
 ; CHECK-FP16-RV64-NEXT:  # %bb.1: # %start
 ; CHECK-FP16-RV64-NEXT:    fmv.d fa1, fa0
-; CHECK-FP16-RV64-NEXT:  .LBB18_2: # %start
+; CHECK-FP16-RV64-NEXT:  .LBB15_2: # %start
 ; CHECK-FP16-RV64-NEXT:    fmin.d fa0, fa1, fa1
 ; CHECK-FP16-RV64-NEXT:    ret
 ;
@@ -4780,10 +2923,10 @@ define double @fcanonicalize_softfloat(double, double) unnamed_addr #0 {
 ; CHECK-NOFP16-RV64-NEXT:    feq.d a1, fa0, fa0
 ; CHECK-NOFP16-RV64-NEXT:    xori a1, a1, 1
 ; CHECK-NOFP16-RV64-NEXT:    or a0, a1, a0
-; CHECK-NOFP16-RV64-NEXT:    bnez a0, .LBB18_2
+; CHECK-NOFP16-RV64-NEXT:    bnez a0, .LBB15_2
 ; CHECK-NOFP16-RV64-NEXT:  # %bb.1: # %start
 ; CHECK-NOFP16-RV64-NEXT:    fmv.d fa1, fa0
-; CHECK-NOFP16-RV64-NEXT:  .LBB18_2: # %start
+; CHECK-NOFP16-RV64-NEXT:  .LBB15_2: # %start
 ; CHECK-NOFP16-RV64-NEXT:    fmin.d fa0, fa1, fa1
 ; CHECK-NOFP16-RV64-NEXT:    ret
 ;
@@ -4816,16 +2959,16 @@ define double @fcanonicalize_softfloat(double, double) unnamed_addr #0 {
 ; CHECK-SOFT-RV32-NEXT:    call __unorddf2
 ; CHECK-SOFT-RV32-NEXT:    snez a0, a0
 ; CHECK-SOFT-RV32-NEXT:    or a0, a0, s4
-; CHECK-SOFT-RV32-NEXT:    bnez a0, .LBB18_2
+; CHECK-SOFT-RV32-NEXT:    bnez a0, .LBB15_2
 ; CHECK-SOFT-RV32-NEXT:  # %bb.1: # %start
 ; CHECK-SOFT-RV32-NEXT:    mv s1, s3
 ; CHECK-SOFT-RV32-NEXT:    mv s0, s2
-; CHECK-SOFT-RV32-NEXT:  .LBB18_2: # %start
+; CHECK-SOFT-RV32-NEXT:  .LBB15_2: # %start
+; CHECK-SOFT-RV32-NEXT:    lui a3, 261888
 ; CHECK-SOFT-RV32-NEXT:    mv a0, s1
 ; CHECK-SOFT-RV32-NEXT:    mv a1, s0
-; CHECK-SOFT-RV32-NEXT:    mv a2, s1
-; CHECK-SOFT-RV32-NEXT:    mv a3, s0
-; CHECK-SOFT-RV32-NEXT:    call fminimum_num
+; CHECK-SOFT-RV32-NEXT:    li a2, 0
+; CHECK-SOFT-RV32-NEXT:    call __muldf3
 ; CHECK-SOFT-RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; CHECK-SOFT-RV32-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; CHECK-SOFT-RV32-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
@@ -4848,10 +2991,10 @@ define double @fcanonicalize_softfloat(double, double) unnamed_addr #0 {
 ; CHECK-FP16-RV32-NEXT:    feq.d a1, fa0, fa0
 ; CHECK-FP16-RV32-NEXT:    xori a1, a1, 1
 ; CHECK-FP16-RV32-NEXT:    or a0, a1, a0
-; CHECK-FP16-RV32-NEXT:    bnez a0, .LBB18_2
+; CHECK-FP16-RV32-NEXT:    bnez a0, .LBB15_2
 ; CHECK-FP16-RV32-NEXT:  # %bb.1: # %start
 ; CHECK-FP16-RV32-NEXT:    fmv.d fa1, fa0
-; CHECK-FP16-RV32-NEXT:  .LBB18_2: # %start
+; CHECK-FP16-RV32-NEXT:  .LBB15_2: # %start
 ; CHECK-FP16-RV32-NEXT:    fmin.d fa0, fa1, fa1
 ; CHECK-FP16-RV32-NEXT:    ret
 ;
@@ -4861,66 +3004,12 @@ define double @fcanonicalize_softfloat(double, double) unnamed_addr #0 {
 ; CHECK-NOFP16-RV32-NEXT:    feq.d a1, fa0, fa0
 ; CHECK-NOFP16-RV32-NEXT:    xori a1, a1, 1
 ; CHECK-NOFP16-RV32-NEXT:    or a0, a1, a0
-; CHECK-NOFP16-RV32-NEXT:    bnez a0, .LBB18_2
+; CHECK-NOFP16-RV32-NEXT:    bnez a0, .LBB15_2
 ; CHECK-NOFP16-RV32-NEXT:  # %bb.1: # %start
 ; CHECK-NOFP16-RV32-NEXT:    fmv.d fa1, fa0
-; CHECK-NOFP16-RV32-NEXT:  .LBB18_2: # %start
+; CHECK-NOFP16-RV32-NEXT:  .LBB15_2: # %start
 ; CHECK-NOFP16-RV32-NEXT:    fmin.d fa0, fa1, fa1
 ; CHECK-NOFP16-RV32-NEXT:    ret
-; RV32-SOFT-LABEL: fcanonicalize_softfloat:
-; RV32-SOFT:       # %bb.0: # %start
-; RV32-SOFT-NEXT:    addi sp, sp, -32
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 32
-; RV32-SOFT-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32-SOFT-NEXT:    .cfi_offset ra, -4
-; RV32-SOFT-NEXT:    .cfi_offset s0, -8
-; RV32-SOFT-NEXT:    .cfi_offset s1, -12
-; RV32-SOFT-NEXT:    .cfi_offset s2, -16
-; RV32-SOFT-NEXT:    .cfi_offset s3, -20
-; RV32-SOFT-NEXT:    .cfi_offset s4, -24
-; RV32-SOFT-NEXT:    mv s0, a3
-; RV32-SOFT-NEXT:    mv s1, a2
-; RV32-SOFT-NEXT:    mv s2, a1
-; RV32-SOFT-NEXT:    mv s3, a0
-; RV32-SOFT-NEXT:    call __ltdf2
-; RV32-SOFT-NEXT:    srli s4, a0, 31
-; RV32-SOFT-NEXT:    mv a0, s3
-; RV32-SOFT-NEXT:    mv a1, s2
-; RV32-SOFT-NEXT:    mv a2, s3
-; RV32-SOFT-NEXT:    mv a3, s2
-; RV32-SOFT-NEXT:    call __unorddf2
-; RV32-SOFT-NEXT:    snez a0, a0
-; RV32-SOFT-NEXT:    or a0, a0, s4
-; RV32-SOFT-NEXT:    bnez a0, .LBB18_2
-; RV32-SOFT-NEXT:  # %bb.1: # %start
-; RV32-SOFT-NEXT:    mv s1, s3
-; RV32-SOFT-NEXT:    mv s0, s2
-; RV32-SOFT-NEXT:  .LBB18_2: # %start
-; RV32-SOFT-NEXT:    mv a0, s1
-; RV32-SOFT-NEXT:    mv a1, s0
-; RV32-SOFT-NEXT:    mv a2, s1
-; RV32-SOFT-NEXT:    mv a3, s0
-; RV32-SOFT-NEXT:    call fminimum_num
-; RV32-SOFT-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32-SOFT-NEXT:    .cfi_restore ra
-; RV32-SOFT-NEXT:    .cfi_restore s0
-; RV32-SOFT-NEXT:    .cfi_restore s1
-; RV32-SOFT-NEXT:    .cfi_restore s2
-; RV32-SOFT-NEXT:    .cfi_restore s3
-; RV32-SOFT-NEXT:    .cfi_restore s4
-; RV32-SOFT-NEXT:    addi sp, sp, 32
-; RV32-SOFT-NEXT:    .cfi_def_cfa_offset 0
-; RV32-SOFT-NEXT:    ret
 start:
   %2 = fcmp olt double %0, %1
   %3 = fcmp uno double %0, 0.000000e+00

>From 101a0cea7e8be5830811591568802d75d69f72d9 Mon Sep 17 00:00:00 2001
From: Kevin Per <kevin.per at protonmail.com>
Date: Wed, 3 Dec 2025 21:01:22 +0100
Subject: [PATCH 5/5] [DAG]: Copied fcanonicalize expansion to softening

---
 .../SelectionDAG/LegalizeFloatTypes.cpp       | 35 +++++++++++--------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 1606ef0f52f4d..801566c2cb6e2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -316,21 +316,26 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
 SDValue DAGTypeLegalizer::SoftenFloatRes_FCANONICALIZE(SDNode *N) {
   SDLoc dl(N);
 
-  // Create a constant 1.0, then soften it to integer and record the mapping.
-  SDValue CstFP = DAG.getConstantFP(1.0, dl, N->getValueType(0));
-  SDValue CstInt = SoftenFloatRes_ConstantFP(CstFP.getNode());
-
-  if (!SoftenedFloats[getTableId(CstFP)])
-    SetSoftenedFloat(CstFP, CstInt);
-
-  // Multiply the input by 1.0 to canonicalize it. We use `MorphNodeTo` to
-  // avoid constant folding, which happens with `DAG.getNode(ISD::FMUL, ...)`.
-  SDNode *Node =
-      DAG.MorphNodeTo(N, ISD::FMUL, DAG.getVTList(N->getValueType(0)),
-                      {N->getOperand(0), CstFP});
-  return SoftenFloatRes_Binary(
-      Node, GetFPLibCall(N->getValueType(0), RTLIB::MUL_F32, RTLIB::MUL_F64,
-                         RTLIB::MUL_F80, RTLIB::MUL_F128, RTLIB::MUL_PPCF128));
+  // This implements llvm.canonicalize.f* by multiplication with 1.0, as
+  // suggested in
+  // https://llvm.org/docs/LangRef.html#llvm-canonicalize-intrinsic.
+  // It uses strict_fp operations even outside a strict_fp context in order
+  // to guarantee that the canonicalization is not optimized away by later
+  // passes. The result chain introduced by that is intentionally ignored
+  // since no ordering requirement is intended here.
+
+  // Create strict multiplication by 1.0.
+  SDValue Operand = N->getOperand(0);
+  EVT VT = Operand.getValueType();
+  SDValue One = DAG.getConstantFP(1.0, dl, VT);
+  SDValue Chain = DAG.getEntryNode();
+  // Propagate existing flags on canonicalize, and additionally set
+  // NoFPExcept.
+  SDNodeFlags CanonicalizeFlags = N->getFlags();
+  CanonicalizeFlags.setNoFPExcept(true);
+  SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other},
+                            {Chain, Operand, One}, CanonicalizeFlags);
+  return BitConvertToInteger(Mul);
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) {



More information about the llvm-commits mailing list