[llvm] 380fd82 - [RISCV][test] Add non-zfbfmin RUN lines to bfloat-convert.ll

Alex Bradbury via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 7 06:39:52 PDT 2023


Author: Alex Bradbury
Date: 2023-08-07T14:39:12+01:00
New Revision: 380fd8201d14a5b2f3f51fc954958bd85d663f5a

URL: https://github.com/llvm/llvm-project/commit/380fd8201d14a5b2f3f51fc954958bd85d663f5a
DIFF: https://github.com/llvm/llvm-project/commit/380fd8201d14a5b2f3f51fc954958bd85d663f5a.diff

LOG: [RISCV][test] Add non-zfbfmin RUN lines to bfloat-convert.ll

As requested in review for https://reviews.llvm.org/D156990

This additionally consistently uses the ilp32d/lp64d ABIs when the D
extension is enabled.

Added: 
    

Modified: 
    llvm/test/CodeGen/RISCV/bfloat-convert.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/RISCV/bfloat-convert.ll b/llvm/test/CodeGen/RISCV/bfloat-convert.ll
index a1b1f3e000b4ff..3aacfb4236b1ff 100644
--- a/llvm/test/CodeGen/RISCV/bfloat-convert.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat-convert.ll
@@ -1,62 +1,120 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfbfmin -verify-machineinstrs \
-; RUN:   -target-abi ilp32f < %s | FileCheck -check-prefixes=CHECK32,RV32IZFBFMIN %s
+; RUN:   -target-abi ilp32f < %s | FileCheck -check-prefixes=CHECK32ZFBFMIN,RV32IZFBFMIN %s
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfbfmin -verify-machineinstrs \
-; RUN:   -target-abi ilp32d < %s | FileCheck -check-prefixes=CHECK32,R32IDZFBFMIN %s
+; RUN:   -target-abi ilp32d < %s | FileCheck -check-prefixes=CHECK32ZFBFMIN,R32IDZFBFMIN %s
+; RUN: llc -mtriple=riscv32 -mattr=+d -verify-machineinstrs \
+; RUN:   -target-abi ilp32d < %s | FileCheck -check-prefixes=RV32ID %s
 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfbfmin -verify-machineinstrs \
-; RUN:   -target-abi lp64f < %s | FileCheck -check-prefixes=CHECK64,RV64IZFBFMIN %s
+; RUN:   -target-abi lp64f < %s | FileCheck -check-prefixes=CHECK64ZFBFMIN,RV64IZFBFMIN %s
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfbfmin -verify-machineinstrs \
-; RUN:   -target-abi lp64f < %s | FileCheck -check-prefixes=CHECK64,RV64IDZFBFMIN %s
+; RUN:   -target-abi lp64d < %s | FileCheck -check-prefixes=CHECK64ZFBFMIN,RV64IDZFBFMIN %s
+; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs \
+; RUN:   -target-abi lp64d < %s | FileCheck -check-prefixes=RV64ID %s
 
 ; These tests descend from float-arith.ll, where each function was targeted at
 ; a particular RISC-V FPU instruction.
 
 define i16 @fcvt_si_bf16(bfloat %a) nounwind {
-; CHECK32-LABEL: fcvt_si_bf16:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    fcvt.s.bf16 fa5, fa0, rne
-; CHECK32-NEXT:    fcvt.w.s a0, fa5, rtz
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: fcvt_si_bf16:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    fcvt.s.bf16 fa5, fa0, rne
-; CHECK64-NEXT:    fcvt.l.s a0, fa5, rtz
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: fcvt_si_bf16:
+; CHECK32ZFBFMIN:       # %bb.0:
+; CHECK32ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0, rne
+; CHECK32ZFBFMIN-NEXT:    fcvt.w.s a0, fa5, rtz
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: fcvt_si_bf16:
+; RV32ID:       # %bb.0:
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    slli a0, a0, 16
+; RV32ID-NEXT:    fmv.w.x fa5, a0
+; RV32ID-NEXT:    fcvt.w.s a0, fa5, rtz
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: fcvt_si_bf16:
+; CHECK64ZFBFMIN:       # %bb.0:
+; CHECK64ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0, rne
+; CHECK64ZFBFMIN-NEXT:    fcvt.l.s a0, fa5, rtz
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_si_bf16:
+; RV64ID:       # %bb.0:
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    slli a0, a0, 48
+; RV64ID-NEXT:    srli a0, a0, 48
+; RV64ID-NEXT:    slli a0, a0, 16
+; RV64ID-NEXT:    fmv.w.x fa5, a0
+; RV64ID-NEXT:    fcvt.l.s a0, fa5, rtz
+; RV64ID-NEXT:    ret
   %1 = fptosi bfloat %a to i16
   ret i16 %1
 }
 
 define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind {
-; CHECK32-LABEL: fcvt_si_bf16_sat:
-; CHECK32:       # %bb.0: # %start
-; CHECK32-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK32-NEXT:    feq.s a0, fa5, fa5
-; CHECK32-NEXT:    neg a0, a0
-; CHECK32-NEXT:    lui a1, %hi(.LCPI1_0)
-; CHECK32-NEXT:    flw fa4, %lo(.LCPI1_0)(a1)
-; CHECK32-NEXT:    lui a1, 815104
-; CHECK32-NEXT:    fmv.w.x fa3, a1
-; CHECK32-NEXT:    fmax.s fa5, fa5, fa3
-; CHECK32-NEXT:    fmin.s fa5, fa5, fa4
-; CHECK32-NEXT:    fcvt.w.s a1, fa5, rtz
-; CHECK32-NEXT:    and a0, a0, a1
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: fcvt_si_bf16_sat:
-; CHECK64:       # %bb.0: # %start
-; CHECK64-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK64-NEXT:    feq.s a0, fa5, fa5
-; CHECK64-NEXT:    lui a1, %hi(.LCPI1_0)
-; CHECK64-NEXT:    flw fa4, %lo(.LCPI1_0)(a1)
-; CHECK64-NEXT:    lui a1, 815104
-; CHECK64-NEXT:    fmv.w.x fa3, a1
-; CHECK64-NEXT:    fmax.s fa5, fa5, fa3
-; CHECK64-NEXT:    neg a0, a0
-; CHECK64-NEXT:    fmin.s fa5, fa5, fa4
-; CHECK64-NEXT:    fcvt.l.s a1, fa5, rtz
-; CHECK64-NEXT:    and a0, a0, a1
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: fcvt_si_bf16_sat:
+; CHECK32ZFBFMIN:       # %bb.0: # %start
+; CHECK32ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK32ZFBFMIN-NEXT:    feq.s a0, fa5, fa5
+; CHECK32ZFBFMIN-NEXT:    neg a0, a0
+; CHECK32ZFBFMIN-NEXT:    lui a1, %hi(.LCPI1_0)
+; CHECK32ZFBFMIN-NEXT:    flw fa4, %lo(.LCPI1_0)(a1)
+; CHECK32ZFBFMIN-NEXT:    lui a1, 815104
+; CHECK32ZFBFMIN-NEXT:    fmv.w.x fa3, a1
+; CHECK32ZFBFMIN-NEXT:    fmax.s fa5, fa5, fa3
+; CHECK32ZFBFMIN-NEXT:    fmin.s fa5, fa5, fa4
+; CHECK32ZFBFMIN-NEXT:    fcvt.w.s a1, fa5, rtz
+; CHECK32ZFBFMIN-NEXT:    and a0, a0, a1
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: fcvt_si_bf16_sat:
+; RV32ID:       # %bb.0: # %start
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    slli a0, a0, 16
+; RV32ID-NEXT:    fmv.w.x fa5, a0
+; RV32ID-NEXT:    feq.s a0, fa5, fa5
+; RV32ID-NEXT:    lui a1, %hi(.LCPI1_0)
+; RV32ID-NEXT:    flw fa4, %lo(.LCPI1_0)(a1)
+; RV32ID-NEXT:    lui a1, 815104
+; RV32ID-NEXT:    fmv.w.x fa3, a1
+; RV32ID-NEXT:    fmax.s fa5, fa5, fa3
+; RV32ID-NEXT:    neg a0, a0
+; RV32ID-NEXT:    fmin.s fa5, fa5, fa4
+; RV32ID-NEXT:    fcvt.w.s a1, fa5, rtz
+; RV32ID-NEXT:    and a0, a0, a1
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: fcvt_si_bf16_sat:
+; CHECK64ZFBFMIN:       # %bb.0: # %start
+; CHECK64ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK64ZFBFMIN-NEXT:    feq.s a0, fa5, fa5
+; CHECK64ZFBFMIN-NEXT:    lui a1, %hi(.LCPI1_0)
+; CHECK64ZFBFMIN-NEXT:    flw fa4, %lo(.LCPI1_0)(a1)
+; CHECK64ZFBFMIN-NEXT:    lui a1, 815104
+; CHECK64ZFBFMIN-NEXT:    fmv.w.x fa3, a1
+; CHECK64ZFBFMIN-NEXT:    fmax.s fa5, fa5, fa3
+; CHECK64ZFBFMIN-NEXT:    neg a0, a0
+; CHECK64ZFBFMIN-NEXT:    fmin.s fa5, fa5, fa4
+; CHECK64ZFBFMIN-NEXT:    fcvt.l.s a1, fa5, rtz
+; CHECK64ZFBFMIN-NEXT:    and a0, a0, a1
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_si_bf16_sat:
+; RV64ID:       # %bb.0: # %start
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    slli a0, a0, 48
+; RV64ID-NEXT:    srli a0, a0, 48
+; RV64ID-NEXT:    slli a0, a0, 16
+; RV64ID-NEXT:    fmv.w.x fa5, a0
+; RV64ID-NEXT:    feq.s a0, fa5, fa5
+; RV64ID-NEXT:    lui a1, %hi(.LCPI1_0)
+; RV64ID-NEXT:    flw fa4, %lo(.LCPI1_0)(a1)
+; RV64ID-NEXT:    lui a1, 815104
+; RV64ID-NEXT:    fmv.w.x fa3, a1
+; RV64ID-NEXT:    fmax.s fa5, fa5, fa3
+; RV64ID-NEXT:    neg a0, a0
+; RV64ID-NEXT:    fmin.s fa5, fa5, fa4
+; RV64ID-NEXT:    fcvt.l.s a1, fa5, rtz
+; RV64ID-NEXT:    and a0, a0, a1
+; RV64ID-NEXT:    ret
 start:
   %0 = tail call i16 @llvm.fptosi.sat.i16.bf16(bfloat %a)
   ret i16 %0
@@ -64,43 +122,89 @@ start:
 declare i16 @llvm.fptosi.sat.i16.bf16(bfloat)
 
 define i16 @fcvt_ui_bf16(bfloat %a) nounwind {
-; CHECK32-LABEL: fcvt_ui_bf16:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    fcvt.s.bf16 fa5, fa0, rne
-; CHECK32-NEXT:    fcvt.wu.s a0, fa5, rtz
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: fcvt_ui_bf16:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    fcvt.s.bf16 fa5, fa0, rne
-; CHECK64-NEXT:    fcvt.lu.s a0, fa5, rtz
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: fcvt_ui_bf16:
+; CHECK32ZFBFMIN:       # %bb.0:
+; CHECK32ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0, rne
+; CHECK32ZFBFMIN-NEXT:    fcvt.wu.s a0, fa5, rtz
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: fcvt_ui_bf16:
+; RV32ID:       # %bb.0:
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    slli a0, a0, 16
+; RV32ID-NEXT:    fmv.w.x fa5, a0
+; RV32ID-NEXT:    fcvt.wu.s a0, fa5, rtz
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: fcvt_ui_bf16:
+; CHECK64ZFBFMIN:       # %bb.0:
+; CHECK64ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0, rne
+; CHECK64ZFBFMIN-NEXT:    fcvt.lu.s a0, fa5, rtz
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_ui_bf16:
+; RV64ID:       # %bb.0:
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    slli a0, a0, 48
+; RV64ID-NEXT:    srli a0, a0, 48
+; RV64ID-NEXT:    slli a0, a0, 16
+; RV64ID-NEXT:    fmv.w.x fa5, a0
+; RV64ID-NEXT:    fcvt.lu.s a0, fa5, rtz
+; RV64ID-NEXT:    ret
   %1 = fptoui bfloat %a to i16
   ret i16 %1
 }
 
 define i16 @fcvt_ui_bf16_sat(bfloat %a) nounwind {
-; CHECK32-LABEL: fcvt_ui_bf16_sat:
-; CHECK32:       # %bb.0: # %start
-; CHECK32-NEXT:    lui a0, %hi(.LCPI3_0)
-; CHECK32-NEXT:    flw fa5, %lo(.LCPI3_0)(a0)
-; CHECK32-NEXT:    fcvt.s.bf16 fa4, fa0
-; CHECK32-NEXT:    fmv.w.x fa3, zero
-; CHECK32-NEXT:    fmax.s fa4, fa4, fa3
-; CHECK32-NEXT:    fmin.s fa5, fa4, fa5
-; CHECK32-NEXT:    fcvt.wu.s a0, fa5, rtz
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: fcvt_ui_bf16_sat:
-; CHECK64:       # %bb.0: # %start
-; CHECK64-NEXT:    lui a0, %hi(.LCPI3_0)
-; CHECK64-NEXT:    flw fa5, %lo(.LCPI3_0)(a0)
-; CHECK64-NEXT:    fcvt.s.bf16 fa4, fa0
-; CHECK64-NEXT:    fmv.w.x fa3, zero
-; CHECK64-NEXT:    fmax.s fa4, fa4, fa3
-; CHECK64-NEXT:    fmin.s fa5, fa4, fa5
-; CHECK64-NEXT:    fcvt.lu.s a0, fa5, rtz
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: fcvt_ui_bf16_sat:
+; CHECK32ZFBFMIN:       # %bb.0: # %start
+; CHECK32ZFBFMIN-NEXT:    lui a0, %hi(.LCPI3_0)
+; CHECK32ZFBFMIN-NEXT:    flw fa5, %lo(.LCPI3_0)(a0)
+; CHECK32ZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa0
+; CHECK32ZFBFMIN-NEXT:    fmv.w.x fa3, zero
+; CHECK32ZFBFMIN-NEXT:    fmax.s fa4, fa4, fa3
+; CHECK32ZFBFMIN-NEXT:    fmin.s fa5, fa4, fa5
+; CHECK32ZFBFMIN-NEXT:    fcvt.wu.s a0, fa5, rtz
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: fcvt_ui_bf16_sat:
+; RV32ID:       # %bb.0: # %start
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    slli a0, a0, 16
+; RV32ID-NEXT:    lui a1, %hi(.LCPI3_0)
+; RV32ID-NEXT:    flw fa5, %lo(.LCPI3_0)(a1)
+; RV32ID-NEXT:    fmv.w.x fa4, a0
+; RV32ID-NEXT:    fmv.w.x fa3, zero
+; RV32ID-NEXT:    fmax.s fa4, fa4, fa3
+; RV32ID-NEXT:    fmin.s fa5, fa4, fa5
+; RV32ID-NEXT:    fcvt.wu.s a0, fa5, rtz
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: fcvt_ui_bf16_sat:
+; CHECK64ZFBFMIN:       # %bb.0: # %start
+; CHECK64ZFBFMIN-NEXT:    lui a0, %hi(.LCPI3_0)
+; CHECK64ZFBFMIN-NEXT:    flw fa5, %lo(.LCPI3_0)(a0)
+; CHECK64ZFBFMIN-NEXT:    fcvt.s.bf16 fa4, fa0
+; CHECK64ZFBFMIN-NEXT:    fmv.w.x fa3, zero
+; CHECK64ZFBFMIN-NEXT:    fmax.s fa4, fa4, fa3
+; CHECK64ZFBFMIN-NEXT:    fmin.s fa5, fa4, fa5
+; CHECK64ZFBFMIN-NEXT:    fcvt.lu.s a0, fa5, rtz
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_ui_bf16_sat:
+; RV64ID:       # %bb.0: # %start
+; RV64ID-NEXT:    lui a0, %hi(.LCPI3_0)
+; RV64ID-NEXT:    flw fa5, %lo(.LCPI3_0)(a0)
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    slli a0, a0, 48
+; RV64ID-NEXT:    srli a0, a0, 48
+; RV64ID-NEXT:    slli a0, a0, 16
+; RV64ID-NEXT:    fmv.w.x fa4, a0
+; RV64ID-NEXT:    fmv.w.x fa3, zero
+; RV64ID-NEXT:    fmax.s fa4, fa4, fa3
+; RV64ID-NEXT:    fmin.s fa5, fa4, fa5
+; RV64ID-NEXT:    fcvt.lu.s a0, fa5, rtz
+; RV64ID-NEXT:    ret
 start:
   %0 = tail call i16 @llvm.fptoui.sat.i16.bf16(bfloat %a)
   ret i16 %0
@@ -108,41 +212,85 @@ start:
 declare i16 @llvm.fptoui.sat.i16.bf16(bfloat)
 
 define i32 @fcvt_w_bf16(bfloat %a) nounwind {
-; CHECK32-LABEL: fcvt_w_bf16:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    fcvt.s.bf16 fa5, fa0, rne
-; CHECK32-NEXT:    fcvt.w.s a0, fa5, rtz
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: fcvt_w_bf16:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK64-NEXT:    fcvt.w.s a0, fa5, rtz
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: fcvt_w_bf16:
+; CHECK32ZFBFMIN:       # %bb.0:
+; CHECK32ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0, rne
+; CHECK32ZFBFMIN-NEXT:    fcvt.w.s a0, fa5, rtz
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: fcvt_w_bf16:
+; RV32ID:       # %bb.0:
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    slli a0, a0, 16
+; RV32ID-NEXT:    fmv.w.x fa5, a0
+; RV32ID-NEXT:    fcvt.w.s a0, fa5, rtz
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: fcvt_w_bf16:
+; CHECK64ZFBFMIN:       # %bb.0:
+; CHECK64ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK64ZFBFMIN-NEXT:    fcvt.w.s a0, fa5, rtz
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_w_bf16:
+; RV64ID:       # %bb.0:
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    slli a0, a0, 48
+; RV64ID-NEXT:    srli a0, a0, 48
+; RV64ID-NEXT:    slli a0, a0, 16
+; RV64ID-NEXT:    fmv.w.x fa5, a0
+; RV64ID-NEXT:    fcvt.l.s a0, fa5, rtz
+; RV64ID-NEXT:    ret
   %1 = fptosi bfloat %a to i32
   ret i32 %1
 }
 
 define i32 @fcvt_w_bf16_sat(bfloat %a) nounwind {
-; CHECK32-LABEL: fcvt_w_bf16_sat:
-; CHECK32:       # %bb.0: # %start
-; CHECK32-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK32-NEXT:    fcvt.w.s a0, fa5, rtz
-; CHECK32-NEXT:    feq.s a1, fa5, fa5
-; CHECK32-NEXT:    seqz a1, a1
-; CHECK32-NEXT:    addi a1, a1, -1
-; CHECK32-NEXT:    and a0, a1, a0
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: fcvt_w_bf16_sat:
-; CHECK64:       # %bb.0: # %start
-; CHECK64-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK64-NEXT:    fcvt.w.s a0, fa5, rtz
-; CHECK64-NEXT:    feq.s a1, fa5, fa5
-; CHECK64-NEXT:    seqz a1, a1
-; CHECK64-NEXT:    addi a1, a1, -1
-; CHECK64-NEXT:    and a0, a1, a0
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: fcvt_w_bf16_sat:
+; CHECK32ZFBFMIN:       # %bb.0: # %start
+; CHECK32ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK32ZFBFMIN-NEXT:    fcvt.w.s a0, fa5, rtz
+; CHECK32ZFBFMIN-NEXT:    feq.s a1, fa5, fa5
+; CHECK32ZFBFMIN-NEXT:    seqz a1, a1
+; CHECK32ZFBFMIN-NEXT:    addi a1, a1, -1
+; CHECK32ZFBFMIN-NEXT:    and a0, a1, a0
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: fcvt_w_bf16_sat:
+; RV32ID:       # %bb.0: # %start
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    slli a0, a0, 16
+; RV32ID-NEXT:    fmv.w.x fa5, a0
+; RV32ID-NEXT:    fcvt.w.s a0, fa5, rtz
+; RV32ID-NEXT:    feq.s a1, fa5, fa5
+; RV32ID-NEXT:    seqz a1, a1
+; RV32ID-NEXT:    addi a1, a1, -1
+; RV32ID-NEXT:    and a0, a1, a0
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: fcvt_w_bf16_sat:
+; CHECK64ZFBFMIN:       # %bb.0: # %start
+; CHECK64ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK64ZFBFMIN-NEXT:    fcvt.w.s a0, fa5, rtz
+; CHECK64ZFBFMIN-NEXT:    feq.s a1, fa5, fa5
+; CHECK64ZFBFMIN-NEXT:    seqz a1, a1
+; CHECK64ZFBFMIN-NEXT:    addi a1, a1, -1
+; CHECK64ZFBFMIN-NEXT:    and a0, a1, a0
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_w_bf16_sat:
+; RV64ID:       # %bb.0: # %start
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    slli a0, a0, 48
+; RV64ID-NEXT:    srli a0, a0, 48
+; RV64ID-NEXT:    slli a0, a0, 16
+; RV64ID-NEXT:    fmv.w.x fa5, a0
+; RV64ID-NEXT:    fcvt.w.s a0, fa5, rtz
+; RV64ID-NEXT:    feq.s a1, fa5, fa5
+; RV64ID-NEXT:    seqz a1, a1
+; RV64ID-NEXT:    addi a1, a1, -1
+; RV64ID-NEXT:    and a0, a1, a0
+; RV64ID-NEXT:    ret
 start:
   %0 = tail call i32 @llvm.fptosi.sat.i32.bf16(bfloat %a)
   ret i32 %0
@@ -150,37 +298,77 @@ start:
 declare i32 @llvm.fptosi.sat.i32.bf16(bfloat)
 
 define i32 @fcvt_wu_bf16(bfloat %a) nounwind {
-; CHECK32-LABEL: fcvt_wu_bf16:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    fcvt.s.bf16 fa5, fa0, rne
-; CHECK32-NEXT:    fcvt.wu.s a0, fa5, rtz
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: fcvt_wu_bf16:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK64-NEXT:    fcvt.wu.s a0, fa5, rtz
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: fcvt_wu_bf16:
+; CHECK32ZFBFMIN:       # %bb.0:
+; CHECK32ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0, rne
+; CHECK32ZFBFMIN-NEXT:    fcvt.wu.s a0, fa5, rtz
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: fcvt_wu_bf16:
+; RV32ID:       # %bb.0:
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    slli a0, a0, 16
+; RV32ID-NEXT:    fmv.w.x fa5, a0
+; RV32ID-NEXT:    fcvt.wu.s a0, fa5, rtz
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: fcvt_wu_bf16:
+; CHECK64ZFBFMIN:       # %bb.0:
+; CHECK64ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK64ZFBFMIN-NEXT:    fcvt.wu.s a0, fa5, rtz
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_wu_bf16:
+; RV64ID:       # %bb.0:
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    slli a0, a0, 48
+; RV64ID-NEXT:    srli a0, a0, 48
+; RV64ID-NEXT:    slli a0, a0, 16
+; RV64ID-NEXT:    fmv.w.x fa5, a0
+; RV64ID-NEXT:    fcvt.lu.s a0, fa5, rtz
+; RV64ID-NEXT:    ret
   %1 = fptoui bfloat %a to i32
   ret i32 %1
 }
 
 define i32 @fcvt_wu_bf16_multiple_use(bfloat %x, ptr %y) nounwind {
-; CHECK32-LABEL: fcvt_wu_bf16_multiple_use:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    fcvt.s.bf16 fa5, fa0, rne
-; CHECK32-NEXT:    fcvt.wu.s a0, fa5, rtz
-; CHECK32-NEXT:    seqz a1, a0
-; CHECK32-NEXT:    add a0, a0, a1
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: fcvt_wu_bf16_multiple_use:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK64-NEXT:    fcvt.wu.s a0, fa5, rtz
-; CHECK64-NEXT:    seqz a1, a0
-; CHECK64-NEXT:    add a0, a0, a1
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: fcvt_wu_bf16_multiple_use:
+; CHECK32ZFBFMIN:       # %bb.0:
+; CHECK32ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0, rne
+; CHECK32ZFBFMIN-NEXT:    fcvt.wu.s a0, fa5, rtz
+; CHECK32ZFBFMIN-NEXT:    seqz a1, a0
+; CHECK32ZFBFMIN-NEXT:    add a0, a0, a1
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: fcvt_wu_bf16_multiple_use:
+; RV32ID:       # %bb.0:
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    slli a0, a0, 16
+; RV32ID-NEXT:    fmv.w.x fa5, a0
+; RV32ID-NEXT:    fcvt.wu.s a0, fa5, rtz
+; RV32ID-NEXT:    seqz a1, a0
+; RV32ID-NEXT:    add a0, a0, a1
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: fcvt_wu_bf16_multiple_use:
+; CHECK64ZFBFMIN:       # %bb.0:
+; CHECK64ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK64ZFBFMIN-NEXT:    fcvt.wu.s a0, fa5, rtz
+; CHECK64ZFBFMIN-NEXT:    seqz a1, a0
+; CHECK64ZFBFMIN-NEXT:    add a0, a0, a1
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_wu_bf16_multiple_use:
+; RV64ID:       # %bb.0:
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    slli a0, a0, 48
+; RV64ID-NEXT:    srli a0, a0, 48
+; RV64ID-NEXT:    slli a0, a0, 16
+; RV64ID-NEXT:    fmv.w.x fa5, a0
+; RV64ID-NEXT:    fcvt.lu.s a0, fa5, rtz
+; RV64ID-NEXT:    seqz a1, a0
+; RV64ID-NEXT:    add a0, a0, a1
+; RV64ID-NEXT:    ret
   %a = fptoui bfloat %x to i32
   %b = icmp eq i32 %a, 0
   %c = select i1 %b, i32 1, i32 %a
@@ -188,34 +376,62 @@ define i32 @fcvt_wu_bf16_multiple_use(bfloat %x, ptr %y) nounwind {
 }
 
 define i32 @fcvt_wu_bf16_sat(bfloat %a) nounwind {
-; CHECK32-LABEL: fcvt_wu_bf16_sat:
-; CHECK32:       # %bb.0: # %start
-; CHECK32-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK32-NEXT:    fcvt.wu.s a0, fa5, rtz
-; CHECK32-NEXT:    feq.s a1, fa5, fa5
-; CHECK32-NEXT:    seqz a1, a1
-; CHECK32-NEXT:    addi a1, a1, -1
-; CHECK32-NEXT:    and a0, a1, a0
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: fcvt_wu_bf16_sat:
-; CHECK64:       # %bb.0: # %start
-; CHECK64-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK64-NEXT:    fcvt.wu.s a0, fa5, rtz
-; CHECK64-NEXT:    feq.s a1, fa5, fa5
-; CHECK64-NEXT:    seqz a1, a1
-; CHECK64-NEXT:    addiw a1, a1, -1
-; CHECK64-NEXT:    and a0, a0, a1
-; CHECK64-NEXT:    slli a0, a0, 32
-; CHECK64-NEXT:    srli a0, a0, 32
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: fcvt_wu_bf16_sat:
+; CHECK32ZFBFMIN:       # %bb.0: # %start
+; CHECK32ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK32ZFBFMIN-NEXT:    fcvt.wu.s a0, fa5, rtz
+; CHECK32ZFBFMIN-NEXT:    feq.s a1, fa5, fa5
+; CHECK32ZFBFMIN-NEXT:    seqz a1, a1
+; CHECK32ZFBFMIN-NEXT:    addi a1, a1, -1
+; CHECK32ZFBFMIN-NEXT:    and a0, a1, a0
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: fcvt_wu_bf16_sat:
+; RV32ID:       # %bb.0: # %start
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    slli a0, a0, 16
+; RV32ID-NEXT:    fmv.w.x fa5, a0
+; RV32ID-NEXT:    fcvt.wu.s a0, fa5, rtz
+; RV32ID-NEXT:    feq.s a1, fa5, fa5
+; RV32ID-NEXT:    seqz a1, a1
+; RV32ID-NEXT:    addi a1, a1, -1
+; RV32ID-NEXT:    and a0, a1, a0
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: fcvt_wu_bf16_sat:
+; CHECK64ZFBFMIN:       # %bb.0: # %start
+; CHECK64ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK64ZFBFMIN-NEXT:    fcvt.wu.s a0, fa5, rtz
+; CHECK64ZFBFMIN-NEXT:    feq.s a1, fa5, fa5
+; CHECK64ZFBFMIN-NEXT:    seqz a1, a1
+; CHECK64ZFBFMIN-NEXT:    addiw a1, a1, -1
+; CHECK64ZFBFMIN-NEXT:    and a0, a0, a1
+; CHECK64ZFBFMIN-NEXT:    slli a0, a0, 32
+; CHECK64ZFBFMIN-NEXT:    srli a0, a0, 32
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_wu_bf16_sat:
+; RV64ID:       # %bb.0: # %start
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    slli a0, a0, 48
+; RV64ID-NEXT:    srli a0, a0, 48
+; RV64ID-NEXT:    slli a0, a0, 16
+; RV64ID-NEXT:    fmv.w.x fa5, a0
+; RV64ID-NEXT:    fcvt.wu.s a0, fa5, rtz
+; RV64ID-NEXT:    feq.s a1, fa5, fa5
+; RV64ID-NEXT:    seqz a1, a1
+; RV64ID-NEXT:    addiw a1, a1, -1
+; RV64ID-NEXT:    and a0, a0, a1
+; RV64ID-NEXT:    slli a0, a0, 32
+; RV64ID-NEXT:    srli a0, a0, 32
+; RV64ID-NEXT:    ret
 start:
   %0 = tail call i32 @llvm.fptoui.sat.i32.bf16(bfloat %a)
   ret i32 %0
 }
 declare i32 @llvm.fptoui.sat.i32.bf16(bfloat)
 
-; TODO: The following tests error on rv32.
+; TODO: The following tests error on rv32 with zfbfmin enabled.
 
 ; define i64 @fcvt_l_bf16(bfloat %a) nounwind {
 ;   %1 = fptosi bfloat %a to i64
@@ -242,151 +458,387 @@ declare i32 @llvm.fptoui.sat.i32.bf16(bfloat)
 ; declare i64 @llvm.fptoui.sat.i64.bf16(bfloat)
 
 define bfloat @fcvt_bf16_si(i16 %a) nounwind {
-; CHECK32-LABEL: fcvt_bf16_si:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    slli a0, a0, 16
-; CHECK32-NEXT:    srai a0, a0, 16
-; CHECK32-NEXT:    fcvt.s.w fa5, a0
-; CHECK32-NEXT:    fcvt.bf16.s fa0, fa5
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: fcvt_bf16_si:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    slli a0, a0, 48
-; CHECK64-NEXT:    srai a0, a0, 48
-; CHECK64-NEXT:    fcvt.s.l fa5, a0
-; CHECK64-NEXT:    fcvt.bf16.s fa0, fa5
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: fcvt_bf16_si:
+; CHECK32ZFBFMIN:       # %bb.0:
+; CHECK32ZFBFMIN-NEXT:    slli a0, a0, 16
+; CHECK32ZFBFMIN-NEXT:    srai a0, a0, 16
+; CHECK32ZFBFMIN-NEXT:    fcvt.s.w fa5, a0
+; CHECK32ZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: fcvt_bf16_si:
+; RV32ID:       # %bb.0:
+; RV32ID-NEXT:    addi sp, sp, -16
+; RV32ID-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32ID-NEXT:    slli a0, a0, 16
+; RV32ID-NEXT:    srai a0, a0, 16
+; RV32ID-NEXT:    fcvt.s.w fa0, a0
+; RV32ID-NEXT:    call __truncsfbf2 at plt
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    lui a1, 1048560
+; RV32ID-NEXT:    or a0, a0, a1
+; RV32ID-NEXT:    fmv.w.x fa0, a0
+; RV32ID-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32ID-NEXT:    addi sp, sp, 16
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: fcvt_bf16_si:
+; CHECK64ZFBFMIN:       # %bb.0:
+; CHECK64ZFBFMIN-NEXT:    slli a0, a0, 48
+; CHECK64ZFBFMIN-NEXT:    srai a0, a0, 48
+; CHECK64ZFBFMIN-NEXT:    fcvt.s.l fa5, a0
+; CHECK64ZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_bf16_si:
+; RV64ID:       # %bb.0:
+; RV64ID-NEXT:    addi sp, sp, -16
+; RV64ID-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64ID-NEXT:    slli a0, a0, 48
+; RV64ID-NEXT:    srai a0, a0, 48
+; RV64ID-NEXT:    fcvt.s.w fa0, a0
+; RV64ID-NEXT:    call __truncsfbf2 at plt
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    lui a1, 1048560
+; RV64ID-NEXT:    or a0, a0, a1
+; RV64ID-NEXT:    fmv.w.x fa0, a0
+; RV64ID-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64ID-NEXT:    addi sp, sp, 16
+; RV64ID-NEXT:    ret
   %1 = sitofp i16 %a to bfloat
   ret bfloat %1
 }
 
 define bfloat @fcvt_bf16_si_signext(i16 signext %a) nounwind {
-; CHECK32-LABEL: fcvt_bf16_si_signext:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    fcvt.s.w fa5, a0
-; CHECK32-NEXT:    fcvt.bf16.s fa0, fa5
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: fcvt_bf16_si_signext:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    fcvt.s.l fa5, a0
-; CHECK64-NEXT:    fcvt.bf16.s fa0, fa5
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: fcvt_bf16_si_signext:
+; CHECK32ZFBFMIN:       # %bb.0:
+; CHECK32ZFBFMIN-NEXT:    fcvt.s.w fa5, a0
+; CHECK32ZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: fcvt_bf16_si_signext:
+; RV32ID:       # %bb.0:
+; RV32ID-NEXT:    addi sp, sp, -16
+; RV32ID-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32ID-NEXT:    fcvt.s.w fa0, a0
+; RV32ID-NEXT:    call __truncsfbf2 at plt
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    lui a1, 1048560
+; RV32ID-NEXT:    or a0, a0, a1
+; RV32ID-NEXT:    fmv.w.x fa0, a0
+; RV32ID-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32ID-NEXT:    addi sp, sp, 16
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: fcvt_bf16_si_signext:
+; CHECK64ZFBFMIN:       # %bb.0:
+; CHECK64ZFBFMIN-NEXT:    fcvt.s.l fa5, a0
+; CHECK64ZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_bf16_si_signext:
+; RV64ID:       # %bb.0:
+; RV64ID-NEXT:    addi sp, sp, -16
+; RV64ID-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64ID-NEXT:    fcvt.s.w fa0, a0
+; RV64ID-NEXT:    call __truncsfbf2 at plt
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    lui a1, 1048560
+; RV64ID-NEXT:    or a0, a0, a1
+; RV64ID-NEXT:    fmv.w.x fa0, a0
+; RV64ID-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64ID-NEXT:    addi sp, sp, 16
+; RV64ID-NEXT:    ret
   %1 = sitofp i16 %a to bfloat
   ret bfloat %1
 }
 
 define bfloat @fcvt_bf16_ui(i16 %a) nounwind {
-; CHECK32-LABEL: fcvt_bf16_ui:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    slli a0, a0, 16
-; CHECK32-NEXT:    srli a0, a0, 16
-; CHECK32-NEXT:    fcvt.s.wu fa5, a0
-; CHECK32-NEXT:    fcvt.bf16.s fa0, fa5
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: fcvt_bf16_ui:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    slli a0, a0, 48
-; CHECK64-NEXT:    srli a0, a0, 48
-; CHECK64-NEXT:    fcvt.s.lu fa5, a0
-; CHECK64-NEXT:    fcvt.bf16.s fa0, fa5
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: fcvt_bf16_ui:
+; CHECK32ZFBFMIN:       # %bb.0:
+; CHECK32ZFBFMIN-NEXT:    slli a0, a0, 16
+; CHECK32ZFBFMIN-NEXT:    srli a0, a0, 16
+; CHECK32ZFBFMIN-NEXT:    fcvt.s.wu fa5, a0
+; CHECK32ZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: fcvt_bf16_ui:
+; RV32ID:       # %bb.0:
+; RV32ID-NEXT:    addi sp, sp, -16
+; RV32ID-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32ID-NEXT:    slli a0, a0, 16
+; RV32ID-NEXT:    srli a0, a0, 16
+; RV32ID-NEXT:    fcvt.s.wu fa0, a0
+; RV32ID-NEXT:    call __truncsfbf2 at plt
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    lui a1, 1048560
+; RV32ID-NEXT:    or a0, a0, a1
+; RV32ID-NEXT:    fmv.w.x fa0, a0
+; RV32ID-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32ID-NEXT:    addi sp, sp, 16
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: fcvt_bf16_ui:
+; CHECK64ZFBFMIN:       # %bb.0:
+; CHECK64ZFBFMIN-NEXT:    slli a0, a0, 48
+; CHECK64ZFBFMIN-NEXT:    srli a0, a0, 48
+; CHECK64ZFBFMIN-NEXT:    fcvt.s.lu fa5, a0
+; CHECK64ZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_bf16_ui:
+; RV64ID:       # %bb.0:
+; RV64ID-NEXT:    addi sp, sp, -16
+; RV64ID-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64ID-NEXT:    slli a0, a0, 48
+; RV64ID-NEXT:    srli a0, a0, 48
+; RV64ID-NEXT:    fcvt.s.wu fa0, a0
+; RV64ID-NEXT:    call __truncsfbf2 at plt
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    lui a1, 1048560
+; RV64ID-NEXT:    or a0, a0, a1
+; RV64ID-NEXT:    fmv.w.x fa0, a0
+; RV64ID-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64ID-NEXT:    addi sp, sp, 16
+; RV64ID-NEXT:    ret
   %1 = uitofp i16 %a to bfloat
   ret bfloat %1
 }
 
 define bfloat @fcvt_bf16_ui_zeroext(i16 zeroext %a) nounwind {
-; CHECK32-LABEL: fcvt_bf16_ui_zeroext:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    fcvt.s.wu fa5, a0
-; CHECK32-NEXT:    fcvt.bf16.s fa0, fa5
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: fcvt_bf16_ui_zeroext:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    fcvt.s.lu fa5, a0
-; CHECK64-NEXT:    fcvt.bf16.s fa0, fa5
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: fcvt_bf16_ui_zeroext:
+; CHECK32ZFBFMIN:       # %bb.0:
+; CHECK32ZFBFMIN-NEXT:    fcvt.s.wu fa5, a0
+; CHECK32ZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: fcvt_bf16_ui_zeroext:
+; RV32ID:       # %bb.0:
+; RV32ID-NEXT:    addi sp, sp, -16
+; RV32ID-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32ID-NEXT:    fcvt.s.wu fa0, a0
+; RV32ID-NEXT:    call __truncsfbf2 at plt
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    lui a1, 1048560
+; RV32ID-NEXT:    or a0, a0, a1
+; RV32ID-NEXT:    fmv.w.x fa0, a0
+; RV32ID-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32ID-NEXT:    addi sp, sp, 16
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: fcvt_bf16_ui_zeroext:
+; CHECK64ZFBFMIN:       # %bb.0:
+; CHECK64ZFBFMIN-NEXT:    fcvt.s.lu fa5, a0
+; CHECK64ZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_bf16_ui_zeroext:
+; RV64ID:       # %bb.0:
+; RV64ID-NEXT:    addi sp, sp, -16
+; RV64ID-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64ID-NEXT:    fcvt.s.wu fa0, a0
+; RV64ID-NEXT:    call __truncsfbf2 at plt
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    lui a1, 1048560
+; RV64ID-NEXT:    or a0, a0, a1
+; RV64ID-NEXT:    fmv.w.x fa0, a0
+; RV64ID-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64ID-NEXT:    addi sp, sp, 16
+; RV64ID-NEXT:    ret
   %1 = uitofp i16 %a to bfloat
   ret bfloat %1
 }
 
 define bfloat @fcvt_bf16_w(i32 %a) nounwind {
-; CHECK32-LABEL: fcvt_bf16_w:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    fcvt.s.w fa5, a0
-; CHECK32-NEXT:    fcvt.bf16.s fa0, fa5
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: fcvt_bf16_w:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    sext.w a0, a0
-; CHECK64-NEXT:    fcvt.s.l fa5, a0
-; CHECK64-NEXT:    fcvt.bf16.s fa0, fa5
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: fcvt_bf16_w:
+; CHECK32ZFBFMIN:       # %bb.0:
+; CHECK32ZFBFMIN-NEXT:    fcvt.s.w fa5, a0
+; CHECK32ZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: fcvt_bf16_w:
+; RV32ID:       # %bb.0:
+; RV32ID-NEXT:    addi sp, sp, -16
+; RV32ID-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32ID-NEXT:    fcvt.s.w fa0, a0
+; RV32ID-NEXT:    call __truncsfbf2 at plt
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    lui a1, 1048560
+; RV32ID-NEXT:    or a0, a0, a1
+; RV32ID-NEXT:    fmv.w.x fa0, a0
+; RV32ID-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32ID-NEXT:    addi sp, sp, 16
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: fcvt_bf16_w:
+; CHECK64ZFBFMIN:       # %bb.0:
+; CHECK64ZFBFMIN-NEXT:    sext.w a0, a0
+; CHECK64ZFBFMIN-NEXT:    fcvt.s.l fa5, a0
+; CHECK64ZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_bf16_w:
+; RV64ID:       # %bb.0:
+; RV64ID-NEXT:    addi sp, sp, -16
+; RV64ID-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64ID-NEXT:    fcvt.s.w fa0, a0
+; RV64ID-NEXT:    call __truncsfbf2 at plt
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    lui a1, 1048560
+; RV64ID-NEXT:    or a0, a0, a1
+; RV64ID-NEXT:    fmv.w.x fa0, a0
+; RV64ID-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64ID-NEXT:    addi sp, sp, 16
+; RV64ID-NEXT:    ret
   %1 = sitofp i32 %a to bfloat
   ret bfloat %1
 }
 
 define bfloat @fcvt_bf16_w_load(ptr %p) nounwind {
-; CHECK32-LABEL: fcvt_bf16_w_load:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    lw a0, 0(a0)
-; CHECK32-NEXT:    fcvt.s.w fa5, a0
-; CHECK32-NEXT:    fcvt.bf16.s fa0, fa5
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: fcvt_bf16_w_load:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    lw a0, 0(a0)
-; CHECK64-NEXT:    fcvt.s.l fa5, a0
-; CHECK64-NEXT:    fcvt.bf16.s fa0, fa5
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: fcvt_bf16_w_load:
+; CHECK32ZFBFMIN:       # %bb.0:
+; CHECK32ZFBFMIN-NEXT:    lw a0, 0(a0)
+; CHECK32ZFBFMIN-NEXT:    fcvt.s.w fa5, a0
+; CHECK32ZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: fcvt_bf16_w_load:
+; RV32ID:       # %bb.0:
+; RV32ID-NEXT:    addi sp, sp, -16
+; RV32ID-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32ID-NEXT:    lw a0, 0(a0)
+; RV32ID-NEXT:    fcvt.s.w fa0, a0
+; RV32ID-NEXT:    call __truncsfbf2 at plt
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    lui a1, 1048560
+; RV32ID-NEXT:    or a0, a0, a1
+; RV32ID-NEXT:    fmv.w.x fa0, a0
+; RV32ID-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32ID-NEXT:    addi sp, sp, 16
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: fcvt_bf16_w_load:
+; CHECK64ZFBFMIN:       # %bb.0:
+; CHECK64ZFBFMIN-NEXT:    lw a0, 0(a0)
+; CHECK64ZFBFMIN-NEXT:    fcvt.s.l fa5, a0
+; CHECK64ZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_bf16_w_load:
+; RV64ID:       # %bb.0:
+; RV64ID-NEXT:    addi sp, sp, -16
+; RV64ID-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64ID-NEXT:    lw a0, 0(a0)
+; RV64ID-NEXT:    fcvt.s.w fa0, a0
+; RV64ID-NEXT:    call __truncsfbf2 at plt
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    lui a1, 1048560
+; RV64ID-NEXT:    or a0, a0, a1
+; RV64ID-NEXT:    fmv.w.x fa0, a0
+; RV64ID-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64ID-NEXT:    addi sp, sp, 16
+; RV64ID-NEXT:    ret
   %a = load i32, ptr %p
   %1 = sitofp i32 %a to bfloat
   ret bfloat %1
 }
 
 define bfloat @fcvt_bf16_wu(i32 %a) nounwind {
-; CHECK32-LABEL: fcvt_bf16_wu:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    fcvt.s.wu fa5, a0
-; CHECK32-NEXT:    fcvt.bf16.s fa0, fa5
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: fcvt_bf16_wu:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    slli a0, a0, 32
-; CHECK64-NEXT:    srli a0, a0, 32
-; CHECK64-NEXT:    fcvt.s.lu fa5, a0
-; CHECK64-NEXT:    fcvt.bf16.s fa0, fa5
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: fcvt_bf16_wu:
+; CHECK32ZFBFMIN:       # %bb.0:
+; CHECK32ZFBFMIN-NEXT:    fcvt.s.wu fa5, a0
+; CHECK32ZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: fcvt_bf16_wu:
+; RV32ID:       # %bb.0:
+; RV32ID-NEXT:    addi sp, sp, -16
+; RV32ID-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32ID-NEXT:    fcvt.s.wu fa0, a0
+; RV32ID-NEXT:    call __truncsfbf2 at plt
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    lui a1, 1048560
+; RV32ID-NEXT:    or a0, a0, a1
+; RV32ID-NEXT:    fmv.w.x fa0, a0
+; RV32ID-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32ID-NEXT:    addi sp, sp, 16
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: fcvt_bf16_wu:
+; CHECK64ZFBFMIN:       # %bb.0:
+; CHECK64ZFBFMIN-NEXT:    slli a0, a0, 32
+; CHECK64ZFBFMIN-NEXT:    srli a0, a0, 32
+; CHECK64ZFBFMIN-NEXT:    fcvt.s.lu fa5, a0
+; CHECK64ZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_bf16_wu:
+; RV64ID:       # %bb.0:
+; RV64ID-NEXT:    addi sp, sp, -16
+; RV64ID-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64ID-NEXT:    fcvt.s.wu fa0, a0
+; RV64ID-NEXT:    call __truncsfbf2 at plt
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    lui a1, 1048560
+; RV64ID-NEXT:    or a0, a0, a1
+; RV64ID-NEXT:    fmv.w.x fa0, a0
+; RV64ID-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64ID-NEXT:    addi sp, sp, 16
+; RV64ID-NEXT:    ret
   %1 = uitofp i32 %a to bfloat
   ret bfloat %1
 }
 
 define bfloat @fcvt_bf16_wu_load(ptr %p) nounwind {
-; CHECK32-LABEL: fcvt_bf16_wu_load:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    lw a0, 0(a0)
-; CHECK32-NEXT:    fcvt.s.wu fa5, a0
-; CHECK32-NEXT:    fcvt.bf16.s fa0, fa5
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: fcvt_bf16_wu_load:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    lwu a0, 0(a0)
-; CHECK64-NEXT:    fcvt.s.lu fa5, a0
-; CHECK64-NEXT:    fcvt.bf16.s fa0, fa5
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: fcvt_bf16_wu_load:
+; CHECK32ZFBFMIN:       # %bb.0:
+; CHECK32ZFBFMIN-NEXT:    lw a0, 0(a0)
+; CHECK32ZFBFMIN-NEXT:    fcvt.s.wu fa5, a0
+; CHECK32ZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: fcvt_bf16_wu_load:
+; RV32ID:       # %bb.0:
+; RV32ID-NEXT:    addi sp, sp, -16
+; RV32ID-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32ID-NEXT:    lw a0, 0(a0)
+; RV32ID-NEXT:    fcvt.s.wu fa0, a0
+; RV32ID-NEXT:    call __truncsfbf2 at plt
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    lui a1, 1048560
+; RV32ID-NEXT:    or a0, a0, a1
+; RV32ID-NEXT:    fmv.w.x fa0, a0
+; RV32ID-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32ID-NEXT:    addi sp, sp, 16
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: fcvt_bf16_wu_load:
+; CHECK64ZFBFMIN:       # %bb.0:
+; CHECK64ZFBFMIN-NEXT:    lwu a0, 0(a0)
+; CHECK64ZFBFMIN-NEXT:    fcvt.s.lu fa5, a0
+; CHECK64ZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_bf16_wu_load:
+; RV64ID:       # %bb.0:
+; RV64ID-NEXT:    addi sp, sp, -16
+; RV64ID-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64ID-NEXT:    lwu a0, 0(a0)
+; RV64ID-NEXT:    fcvt.s.wu fa0, a0
+; RV64ID-NEXT:    call __truncsfbf2 at plt
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    lui a1, 1048560
+; RV64ID-NEXT:    or a0, a0, a1
+; RV64ID-NEXT:    fmv.w.x fa0, a0
+; RV64ID-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64ID-NEXT:    addi sp, sp, 16
+; RV64ID-NEXT:    ret
   %a = load i32, ptr %p
   %1 = uitofp i32 %a to bfloat
   ret bfloat %1
 }
 
-; TODO: The following tests error on rv32.
+; TODO: The following tests error on rv32 with zfbfmin enabled.
 
 ; define bfloat @fcvt_bf16_l(i64 %a) nounwind {
 ;   %1 = sitofp i64 %a to bfloat
@@ -399,29 +851,71 @@ define bfloat @fcvt_bf16_wu_load(ptr %p) nounwind {
 ; }
 
 define bfloat @fcvt_bf16_s(float %a) nounwind {
-; CHECK32-LABEL: fcvt_bf16_s:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    fcvt.bf16.s fa0, fa0
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: fcvt_bf16_s:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    fcvt.bf16.s fa0, fa0
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: fcvt_bf16_s:
+; CHECK32ZFBFMIN:       # %bb.0:
+; CHECK32ZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa0
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: fcvt_bf16_s:
+; RV32ID:       # %bb.0:
+; RV32ID-NEXT:    addi sp, sp, -16
+; RV32ID-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32ID-NEXT:    call __truncsfbf2 at plt
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    lui a1, 1048560
+; RV32ID-NEXT:    or a0, a0, a1
+; RV32ID-NEXT:    fmv.w.x fa0, a0
+; RV32ID-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32ID-NEXT:    addi sp, sp, 16
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: fcvt_bf16_s:
+; CHECK64ZFBFMIN:       # %bb.0:
+; CHECK64ZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa0
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_bf16_s:
+; RV64ID:       # %bb.0:
+; RV64ID-NEXT:    addi sp, sp, -16
+; RV64ID-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64ID-NEXT:    call __truncsfbf2 at plt
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    lui a1, 1048560
+; RV64ID-NEXT:    or a0, a0, a1
+; RV64ID-NEXT:    fmv.w.x fa0, a0
+; RV64ID-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64ID-NEXT:    addi sp, sp, 16
+; RV64ID-NEXT:    ret
   %1 = fptrunc float %a to bfloat
   ret bfloat %1
 }
 
 define float @fcvt_s_bf16(bfloat %a) nounwind {
-; CHECK32-LABEL: fcvt_s_bf16:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    fcvt.s.bf16 fa0, fa0
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: fcvt_s_bf16:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    fcvt.s.bf16 fa0, fa0
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: fcvt_s_bf16:
+; CHECK32ZFBFMIN:       # %bb.0:
+; CHECK32ZFBFMIN-NEXT:    fcvt.s.bf16 fa0, fa0
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: fcvt_s_bf16:
+; RV32ID:       # %bb.0:
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    slli a0, a0, 16
+; RV32ID-NEXT:    fmv.w.x fa0, a0
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: fcvt_s_bf16:
+; CHECK64ZFBFMIN:       # %bb.0:
+; CHECK64ZFBFMIN-NEXT:    fcvt.s.bf16 fa0, fa0
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_s_bf16:
+; RV64ID:       # %bb.0:
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    slli a0, a0, 48
+; RV64ID-NEXT:    srli a0, a0, 48
+; RV64ID-NEXT:    slli a0, a0, 16
+; RV64ID-NEXT:    fmv.w.x fa0, a0
+; RV64ID-NEXT:    ret
   %1 = fpext bfloat %a to float
   ret float %1
 }
@@ -442,6 +936,19 @@ define bfloat @fcvt_bf16_d(double %a) nounwind {
 ; R32IDZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
 ; R32IDZFBFMIN-NEXT:    ret
 ;
+; RV32ID-LABEL: fcvt_bf16_d:
+; RV32ID:       # %bb.0:
+; RV32ID-NEXT:    addi sp, sp, -16
+; RV32ID-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32ID-NEXT:    call __truncdfbf2 at plt
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    lui a1, 1048560
+; RV32ID-NEXT:    or a0, a0, a1
+; RV32ID-NEXT:    fmv.w.x fa0, a0
+; RV32ID-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32ID-NEXT:    addi sp, sp, 16
+; RV32ID-NEXT:    ret
+;
 ; RV64IZFBFMIN-LABEL: fcvt_bf16_d:
 ; RV64IZFBFMIN:       # %bb.0:
 ; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
@@ -453,10 +960,22 @@ define bfloat @fcvt_bf16_d(double %a) nounwind {
 ;
 ; RV64IDZFBFMIN-LABEL: fcvt_bf16_d:
 ; RV64IDZFBFMIN:       # %bb.0:
-; RV64IDZFBFMIN-NEXT:    fmv.d.x fa5, a0
-; RV64IDZFBFMIN-NEXT:    fcvt.s.d fa5, fa5
+; RV64IDZFBFMIN-NEXT:    fcvt.s.d fa5, fa0
 ; RV64IDZFBFMIN-NEXT:    fcvt.bf16.s fa0, fa5
 ; RV64IDZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_bf16_d:
+; RV64ID:       # %bb.0:
+; RV64ID-NEXT:    addi sp, sp, -16
+; RV64ID-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64ID-NEXT:    call __truncdfbf2 at plt
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    lui a1, 1048560
+; RV64ID-NEXT:    or a0, a0, a1
+; RV64ID-NEXT:    fmv.w.x fa0, a0
+; RV64ID-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64ID-NEXT:    addi sp, sp, 16
+; RV64ID-NEXT:    ret
   %1 = fptrunc double %a to bfloat
   ret bfloat %1
 }
@@ -478,6 +997,14 @@ define double @fcvt_d_bf16(bfloat %a) nounwind {
 ; R32IDZFBFMIN-NEXT:    fcvt.d.s fa0, fa5
 ; R32IDZFBFMIN-NEXT:    ret
 ;
+; RV32ID-LABEL: fcvt_d_bf16:
+; RV32ID:       # %bb.0:
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    slli a0, a0, 16
+; RV32ID-NEXT:    fmv.w.x fa5, a0
+; RV32ID-NEXT:    fcvt.d.s fa0, fa5
+; RV32ID-NEXT:    ret
+;
 ; RV64IZFBFMIN-LABEL: fcvt_d_bf16:
 ; RV64IZFBFMIN:       # %bb.0:
 ; RV64IZFBFMIN-NEXT:    addi sp, sp, -16
@@ -491,57 +1018,128 @@ define double @fcvt_d_bf16(bfloat %a) nounwind {
 ; RV64IDZFBFMIN-LABEL: fcvt_d_bf16:
 ; RV64IDZFBFMIN:       # %bb.0:
 ; RV64IDZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
-; RV64IDZFBFMIN-NEXT:    fcvt.d.s fa5, fa5
-; RV64IDZFBFMIN-NEXT:    fmv.x.d a0, fa5
+; RV64IDZFBFMIN-NEXT:    fcvt.d.s fa0, fa5
 ; RV64IDZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_d_bf16:
+; RV64ID:       # %bb.0:
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    slli a0, a0, 48
+; RV64ID-NEXT:    srli a0, a0, 48
+; RV64ID-NEXT:    slli a0, a0, 16
+; RV64ID-NEXT:    fmv.w.x fa5, a0
+; RV64ID-NEXT:    fcvt.d.s fa0, fa5
+; RV64ID-NEXT:    ret
   %1 = fpext bfloat %a to double
   ret double %1
 }
 
 define bfloat @bitcast_bf16_i16(i16 %a) nounwind {
-; CHECK32-LABEL: bitcast_bf16_i16:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    fmv.h.x fa0, a0
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: bitcast_bf16_i16:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    fmv.h.x fa0, a0
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: bitcast_bf16_i16:
+; CHECK32ZFBFMIN:       # %bb.0:
+; CHECK32ZFBFMIN-NEXT:    fmv.h.x fa0, a0
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: bitcast_bf16_i16:
+; RV32ID:       # %bb.0:
+; RV32ID-NEXT:    lui a1, 1048560
+; RV32ID-NEXT:    or a0, a0, a1
+; RV32ID-NEXT:    fmv.w.x fa0, a0
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: bitcast_bf16_i16:
+; CHECK64ZFBFMIN:       # %bb.0:
+; CHECK64ZFBFMIN-NEXT:    fmv.h.x fa0, a0
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: bitcast_bf16_i16:
+; RV64ID:       # %bb.0:
+; RV64ID-NEXT:    lui a1, 1048560
+; RV64ID-NEXT:    or a0, a0, a1
+; RV64ID-NEXT:    fmv.w.x fa0, a0
+; RV64ID-NEXT:    ret
   %1 = bitcast i16 %a to bfloat
   ret bfloat %1
 }
 
 define i16 @bitcast_i16_bf16(bfloat %a) nounwind {
-; CHECK32-LABEL: bitcast_i16_bf16:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    fmv.x.h a0, fa0
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: bitcast_i16_bf16:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    fmv.x.h a0, fa0
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: bitcast_i16_bf16:
+; CHECK32ZFBFMIN:       # %bb.0:
+; CHECK32ZFBFMIN-NEXT:    fmv.x.h a0, fa0
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: bitcast_i16_bf16:
+; RV32ID:       # %bb.0:
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: bitcast_i16_bf16:
+; CHECK64ZFBFMIN:       # %bb.0:
+; CHECK64ZFBFMIN-NEXT:    fmv.x.h a0, fa0
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: bitcast_i16_bf16:
+; RV64ID:       # %bb.0:
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    ret
   %1 = bitcast bfloat %a to i16
   ret i16 %1
 }
 
 define signext i32 @fcvt_bf16_w_demanded_bits(i32 signext %0, ptr %1) nounwind {
-; CHECK32-LABEL: fcvt_bf16_w_demanded_bits:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    addi a0, a0, 1
-; CHECK32-NEXT:    fcvt.s.w fa5, a0
-; CHECK32-NEXT:    fcvt.bf16.s fa5, fa5
-; CHECK32-NEXT:    fsh fa5, 0(a1)
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: fcvt_bf16_w_demanded_bits:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    addiw a0, a0, 1
-; CHECK64-NEXT:    fcvt.s.l fa5, a0
-; CHECK64-NEXT:    fcvt.bf16.s fa5, fa5
-; CHECK64-NEXT:    fsh fa5, 0(a1)
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: fcvt_bf16_w_demanded_bits:
+; CHECK32ZFBFMIN:       # %bb.0:
+; CHECK32ZFBFMIN-NEXT:    addi a0, a0, 1
+; CHECK32ZFBFMIN-NEXT:    fcvt.s.w fa5, a0
+; CHECK32ZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; CHECK32ZFBFMIN-NEXT:    fsh fa5, 0(a1)
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: fcvt_bf16_w_demanded_bits:
+; RV32ID:       # %bb.0:
+; RV32ID-NEXT:    addi sp, sp, -16
+; RV32ID-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32ID-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32ID-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
+; RV32ID-NEXT:    mv s0, a1
+; RV32ID-NEXT:    addi s1, a0, 1
+; RV32ID-NEXT:    fcvt.s.w fa0, s1
+; RV32ID-NEXT:    call __truncsfbf2 at plt
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    sh a0, 0(s0)
+; RV32ID-NEXT:    mv a0, s1
+; RV32ID-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32ID-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32ID-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32ID-NEXT:    addi sp, sp, 16
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: fcvt_bf16_w_demanded_bits:
+; CHECK64ZFBFMIN:       # %bb.0:
+; CHECK64ZFBFMIN-NEXT:    addiw a0, a0, 1
+; CHECK64ZFBFMIN-NEXT:    fcvt.s.l fa5, a0
+; CHECK64ZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; CHECK64ZFBFMIN-NEXT:    fsh fa5, 0(a1)
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_bf16_w_demanded_bits:
+; RV64ID:       # %bb.0:
+; RV64ID-NEXT:    addi sp, sp, -32
+; RV64ID-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64ID-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64ID-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64ID-NEXT:    mv s0, a1
+; RV64ID-NEXT:    addiw s1, a0, 1
+; RV64ID-NEXT:    fcvt.s.w fa0, s1
+; RV64ID-NEXT:    call __truncsfbf2 at plt
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    sh a0, 0(s0)
+; RV64ID-NEXT:    mv a0, s1
+; RV64ID-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64ID-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64ID-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64ID-NEXT:    addi sp, sp, 32
+; RV64ID-NEXT:    ret
   %3 = add i32 %0, 1
   %4 = sitofp i32 %3 to bfloat
   store bfloat %4, ptr %1, align 2
@@ -549,23 +1147,61 @@ define signext i32 @fcvt_bf16_w_demanded_bits(i32 signext %0, ptr %1) nounwind {
 }
 
 define signext i32 @fcvt_bf16_wu_demanded_bits(i32 signext %0, ptr %1) nounwind {
-; CHECK32-LABEL: fcvt_bf16_wu_demanded_bits:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    addi a0, a0, 1
-; CHECK32-NEXT:    fcvt.s.wu fa5, a0
-; CHECK32-NEXT:    fcvt.bf16.s fa5, fa5
-; CHECK32-NEXT:    fsh fa5, 0(a1)
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: fcvt_bf16_wu_demanded_bits:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    addiw a0, a0, 1
-; CHECK64-NEXT:    slli a2, a0, 32
-; CHECK64-NEXT:    srli a2, a2, 32
-; CHECK64-NEXT:    fcvt.s.lu fa5, a2
-; CHECK64-NEXT:    fcvt.bf16.s fa5, fa5
-; CHECK64-NEXT:    fsh fa5, 0(a1)
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: fcvt_bf16_wu_demanded_bits:
+; CHECK32ZFBFMIN:       # %bb.0:
+; CHECK32ZFBFMIN-NEXT:    addi a0, a0, 1
+; CHECK32ZFBFMIN-NEXT:    fcvt.s.wu fa5, a0
+; CHECK32ZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; CHECK32ZFBFMIN-NEXT:    fsh fa5, 0(a1)
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: fcvt_bf16_wu_demanded_bits:
+; RV32ID:       # %bb.0:
+; RV32ID-NEXT:    addi sp, sp, -16
+; RV32ID-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32ID-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32ID-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
+; RV32ID-NEXT:    mv s0, a1
+; RV32ID-NEXT:    addi s1, a0, 1
+; RV32ID-NEXT:    fcvt.s.wu fa0, s1
+; RV32ID-NEXT:    call __truncsfbf2 at plt
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    sh a0, 0(s0)
+; RV32ID-NEXT:    mv a0, s1
+; RV32ID-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32ID-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32ID-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32ID-NEXT:    addi sp, sp, 16
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: fcvt_bf16_wu_demanded_bits:
+; CHECK64ZFBFMIN:       # %bb.0:
+; CHECK64ZFBFMIN-NEXT:    addiw a0, a0, 1
+; CHECK64ZFBFMIN-NEXT:    slli a2, a0, 32
+; CHECK64ZFBFMIN-NEXT:    srli a2, a2, 32
+; CHECK64ZFBFMIN-NEXT:    fcvt.s.lu fa5, a2
+; CHECK64ZFBFMIN-NEXT:    fcvt.bf16.s fa5, fa5
+; CHECK64ZFBFMIN-NEXT:    fsh fa5, 0(a1)
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_bf16_wu_demanded_bits:
+; RV64ID:       # %bb.0:
+; RV64ID-NEXT:    addi sp, sp, -32
+; RV64ID-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64ID-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64ID-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64ID-NEXT:    mv s0, a1
+; RV64ID-NEXT:    addiw s1, a0, 1
+; RV64ID-NEXT:    fcvt.s.wu fa0, s1
+; RV64ID-NEXT:    call __truncsfbf2 at plt
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    sh a0, 0(s0)
+; RV64ID-NEXT:    mv a0, s1
+; RV64ID-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64ID-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64ID-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64ID-NEXT:    addi sp, sp, 32
+; RV64ID-NEXT:    ret
   %3 = add i32 %0, 1
   %4 = uitofp i32 %3 to bfloat
   store bfloat %4, ptr %1, align 2
@@ -573,51 +1209,105 @@ define signext i32 @fcvt_bf16_wu_demanded_bits(i32 signext %0, ptr %1) nounwind
 }
 
 define signext i8 @fcvt_w_s_i8(bfloat %a) nounwind {
-; CHECK32-LABEL: fcvt_w_s_i8:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    fcvt.s.bf16 fa5, fa0, rne
-; CHECK32-NEXT:    fcvt.w.s a0, fa5, rtz
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: fcvt_w_s_i8:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    fcvt.s.bf16 fa5, fa0, rne
-; CHECK64-NEXT:    fcvt.l.s a0, fa5, rtz
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: fcvt_w_s_i8:
+; CHECK32ZFBFMIN:       # %bb.0:
+; CHECK32ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0, rne
+; CHECK32ZFBFMIN-NEXT:    fcvt.w.s a0, fa5, rtz
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: fcvt_w_s_i8:
+; RV32ID:       # %bb.0:
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    slli a0, a0, 16
+; RV32ID-NEXT:    fmv.w.x fa5, a0
+; RV32ID-NEXT:    fcvt.w.s a0, fa5, rtz
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: fcvt_w_s_i8:
+; CHECK64ZFBFMIN:       # %bb.0:
+; CHECK64ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0, rne
+; CHECK64ZFBFMIN-NEXT:    fcvt.l.s a0, fa5, rtz
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_w_s_i8:
+; RV64ID:       # %bb.0:
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    slli a0, a0, 48
+; RV64ID-NEXT:    srli a0, a0, 48
+; RV64ID-NEXT:    slli a0, a0, 16
+; RV64ID-NEXT:    fmv.w.x fa5, a0
+; RV64ID-NEXT:    fcvt.l.s a0, fa5, rtz
+; RV64ID-NEXT:    ret
   %1 = fptosi bfloat %a to i8
   ret i8 %1
 }
 
 define signext i8 @fcvt_w_s_sat_i8(bfloat %a) nounwind {
-; CHECK32-LABEL: fcvt_w_s_sat_i8:
-; CHECK32:       # %bb.0: # %start
-; CHECK32-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK32-NEXT:    feq.s a0, fa5, fa5
-; CHECK32-NEXT:    neg a0, a0
-; CHECK32-NEXT:    lui a1, 798720
-; CHECK32-NEXT:    fmv.w.x fa4, a1
-; CHECK32-NEXT:    fmax.s fa5, fa5, fa4
-; CHECK32-NEXT:    lui a1, 274400
-; CHECK32-NEXT:    fmv.w.x fa4, a1
-; CHECK32-NEXT:    fmin.s fa5, fa5, fa4
-; CHECK32-NEXT:    fcvt.w.s a1, fa5, rtz
-; CHECK32-NEXT:    and a0, a0, a1
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: fcvt_w_s_sat_i8:
-; CHECK64:       # %bb.0: # %start
-; CHECK64-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK64-NEXT:    feq.s a0, fa5, fa5
-; CHECK64-NEXT:    neg a0, a0
-; CHECK64-NEXT:    lui a1, 798720
-; CHECK64-NEXT:    fmv.w.x fa4, a1
-; CHECK64-NEXT:    fmax.s fa5, fa5, fa4
-; CHECK64-NEXT:    lui a1, 274400
-; CHECK64-NEXT:    fmv.w.x fa4, a1
-; CHECK64-NEXT:    fmin.s fa5, fa5, fa4
-; CHECK64-NEXT:    fcvt.l.s a1, fa5, rtz
-; CHECK64-NEXT:    and a0, a0, a1
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: fcvt_w_s_sat_i8:
+; CHECK32ZFBFMIN:       # %bb.0: # %start
+; CHECK32ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK32ZFBFMIN-NEXT:    feq.s a0, fa5, fa5
+; CHECK32ZFBFMIN-NEXT:    neg a0, a0
+; CHECK32ZFBFMIN-NEXT:    lui a1, 798720
+; CHECK32ZFBFMIN-NEXT:    fmv.w.x fa4, a1
+; CHECK32ZFBFMIN-NEXT:    fmax.s fa5, fa5, fa4
+; CHECK32ZFBFMIN-NEXT:    lui a1, 274400
+; CHECK32ZFBFMIN-NEXT:    fmv.w.x fa4, a1
+; CHECK32ZFBFMIN-NEXT:    fmin.s fa5, fa5, fa4
+; CHECK32ZFBFMIN-NEXT:    fcvt.w.s a1, fa5, rtz
+; CHECK32ZFBFMIN-NEXT:    and a0, a0, a1
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: fcvt_w_s_sat_i8:
+; RV32ID:       # %bb.0: # %start
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    slli a0, a0, 16
+; RV32ID-NEXT:    fmv.w.x fa5, a0
+; RV32ID-NEXT:    feq.s a0, fa5, fa5
+; RV32ID-NEXT:    neg a0, a0
+; RV32ID-NEXT:    lui a1, 798720
+; RV32ID-NEXT:    fmv.w.x fa4, a1
+; RV32ID-NEXT:    fmax.s fa5, fa5, fa4
+; RV32ID-NEXT:    lui a1, 274400
+; RV32ID-NEXT:    fmv.w.x fa4, a1
+; RV32ID-NEXT:    fmin.s fa5, fa5, fa4
+; RV32ID-NEXT:    fcvt.w.s a1, fa5, rtz
+; RV32ID-NEXT:    and a0, a0, a1
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: fcvt_w_s_sat_i8:
+; CHECK64ZFBFMIN:       # %bb.0: # %start
+; CHECK64ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK64ZFBFMIN-NEXT:    feq.s a0, fa5, fa5
+; CHECK64ZFBFMIN-NEXT:    neg a0, a0
+; CHECK64ZFBFMIN-NEXT:    lui a1, 798720
+; CHECK64ZFBFMIN-NEXT:    fmv.w.x fa4, a1
+; CHECK64ZFBFMIN-NEXT:    fmax.s fa5, fa5, fa4
+; CHECK64ZFBFMIN-NEXT:    lui a1, 274400
+; CHECK64ZFBFMIN-NEXT:    fmv.w.x fa4, a1
+; CHECK64ZFBFMIN-NEXT:    fmin.s fa5, fa5, fa4
+; CHECK64ZFBFMIN-NEXT:    fcvt.l.s a1, fa5, rtz
+; CHECK64ZFBFMIN-NEXT:    and a0, a0, a1
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_w_s_sat_i8:
+; RV64ID:       # %bb.0: # %start
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    slli a0, a0, 48
+; RV64ID-NEXT:    srli a0, a0, 48
+; RV64ID-NEXT:    slli a0, a0, 16
+; RV64ID-NEXT:    fmv.w.x fa5, a0
+; RV64ID-NEXT:    feq.s a0, fa5, fa5
+; RV64ID-NEXT:    neg a0, a0
+; RV64ID-NEXT:    lui a1, 798720
+; RV64ID-NEXT:    fmv.w.x fa4, a1
+; RV64ID-NEXT:    fmax.s fa5, fa5, fa4
+; RV64ID-NEXT:    lui a1, 274400
+; RV64ID-NEXT:    fmv.w.x fa4, a1
+; RV64ID-NEXT:    fmin.s fa5, fa5, fa4
+; RV64ID-NEXT:    fcvt.l.s a1, fa5, rtz
+; RV64ID-NEXT:    and a0, a0, a1
+; RV64ID-NEXT:    ret
 start:
   %0 = tail call i8 @llvm.fptosi.sat.i8.bf16(bfloat %a)
   ret i8 %0
@@ -625,43 +1315,89 @@ start:
 declare i8 @llvm.fptosi.sat.i8.bf16(bfloat)
 
 define zeroext i8 @fcvt_wu_s_i8(bfloat %a) nounwind {
-; CHECK32-LABEL: fcvt_wu_s_i8:
-; CHECK32:       # %bb.0:
-; CHECK32-NEXT:    fcvt.s.bf16 fa5, fa0, rne
-; CHECK32-NEXT:    fcvt.wu.s a0, fa5, rtz
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: fcvt_wu_s_i8:
-; CHECK64:       # %bb.0:
-; CHECK64-NEXT:    fcvt.s.bf16 fa5, fa0, rne
-; CHECK64-NEXT:    fcvt.lu.s a0, fa5, rtz
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: fcvt_wu_s_i8:
+; CHECK32ZFBFMIN:       # %bb.0:
+; CHECK32ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0, rne
+; CHECK32ZFBFMIN-NEXT:    fcvt.wu.s a0, fa5, rtz
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: fcvt_wu_s_i8:
+; RV32ID:       # %bb.0:
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    slli a0, a0, 16
+; RV32ID-NEXT:    fmv.w.x fa5, a0
+; RV32ID-NEXT:    fcvt.wu.s a0, fa5, rtz
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: fcvt_wu_s_i8:
+; CHECK64ZFBFMIN:       # %bb.0:
+; CHECK64ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0, rne
+; CHECK64ZFBFMIN-NEXT:    fcvt.lu.s a0, fa5, rtz
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_wu_s_i8:
+; RV64ID:       # %bb.0:
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    slli a0, a0, 48
+; RV64ID-NEXT:    srli a0, a0, 48
+; RV64ID-NEXT:    slli a0, a0, 16
+; RV64ID-NEXT:    fmv.w.x fa5, a0
+; RV64ID-NEXT:    fcvt.lu.s a0, fa5, rtz
+; RV64ID-NEXT:    ret
   %1 = fptoui bfloat %a to i8
   ret i8 %1
 }
 
 define zeroext i8 @fcvt_wu_s_sat_i8(bfloat %a) nounwind {
-; CHECK32-LABEL: fcvt_wu_s_sat_i8:
-; CHECK32:       # %bb.0: # %start
-; CHECK32-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK32-NEXT:    fmv.w.x fa4, zero
-; CHECK32-NEXT:    fmax.s fa5, fa5, fa4
-; CHECK32-NEXT:    lui a0, 276464
-; CHECK32-NEXT:    fmv.w.x fa4, a0
-; CHECK32-NEXT:    fmin.s fa5, fa5, fa4
-; CHECK32-NEXT:    fcvt.wu.s a0, fa5, rtz
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: fcvt_wu_s_sat_i8:
-; CHECK64:       # %bb.0: # %start
-; CHECK64-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK64-NEXT:    fmv.w.x fa4, zero
-; CHECK64-NEXT:    fmax.s fa5, fa5, fa4
-; CHECK64-NEXT:    lui a0, 276464
-; CHECK64-NEXT:    fmv.w.x fa4, a0
-; CHECK64-NEXT:    fmin.s fa5, fa5, fa4
-; CHECK64-NEXT:    fcvt.lu.s a0, fa5, rtz
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: fcvt_wu_s_sat_i8:
+; CHECK32ZFBFMIN:       # %bb.0: # %start
+; CHECK32ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK32ZFBFMIN-NEXT:    fmv.w.x fa4, zero
+; CHECK32ZFBFMIN-NEXT:    fmax.s fa5, fa5, fa4
+; CHECK32ZFBFMIN-NEXT:    lui a0, 276464
+; CHECK32ZFBFMIN-NEXT:    fmv.w.x fa4, a0
+; CHECK32ZFBFMIN-NEXT:    fmin.s fa5, fa5, fa4
+; CHECK32ZFBFMIN-NEXT:    fcvt.wu.s a0, fa5, rtz
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: fcvt_wu_s_sat_i8:
+; RV32ID:       # %bb.0: # %start
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    slli a0, a0, 16
+; RV32ID-NEXT:    fmv.w.x fa5, a0
+; RV32ID-NEXT:    fmv.w.x fa4, zero
+; RV32ID-NEXT:    fmax.s fa5, fa5, fa4
+; RV32ID-NEXT:    lui a0, 276464
+; RV32ID-NEXT:    fmv.w.x fa4, a0
+; RV32ID-NEXT:    fmin.s fa5, fa5, fa4
+; RV32ID-NEXT:    fcvt.wu.s a0, fa5, rtz
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: fcvt_wu_s_sat_i8:
+; CHECK64ZFBFMIN:       # %bb.0: # %start
+; CHECK64ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK64ZFBFMIN-NEXT:    fmv.w.x fa4, zero
+; CHECK64ZFBFMIN-NEXT:    fmax.s fa5, fa5, fa4
+; CHECK64ZFBFMIN-NEXT:    lui a0, 276464
+; CHECK64ZFBFMIN-NEXT:    fmv.w.x fa4, a0
+; CHECK64ZFBFMIN-NEXT:    fmin.s fa5, fa5, fa4
+; CHECK64ZFBFMIN-NEXT:    fcvt.lu.s a0, fa5, rtz
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_wu_s_sat_i8:
+; RV64ID:       # %bb.0: # %start
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    slli a0, a0, 48
+; RV64ID-NEXT:    srli a0, a0, 48
+; RV64ID-NEXT:    slli a0, a0, 16
+; RV64ID-NEXT:    fmv.w.x fa5, a0
+; RV64ID-NEXT:    fmv.w.x fa4, zero
+; RV64ID-NEXT:    fmax.s fa5, fa5, fa4
+; RV64ID-NEXT:    lui a0, 276464
+; RV64ID-NEXT:    fmv.w.x fa4, a0
+; RV64ID-NEXT:    fmin.s fa5, fa5, fa4
+; RV64ID-NEXT:    fcvt.lu.s a0, fa5, rtz
+; RV64ID-NEXT:    ret
 start:
   %0 = tail call i8 @llvm.fptoui.sat.i8.bf16(bfloat %a)
   ret i8 %0
@@ -669,52 +1405,106 @@ start:
 declare i8 @llvm.fptoui.sat.i8.bf16(bfloat)
 
 define zeroext i32 @fcvt_wu_bf16_sat_zext(bfloat %a) nounwind {
-; CHECK32-LABEL: fcvt_wu_bf16_sat_zext:
-; CHECK32:       # %bb.0: # %start
-; CHECK32-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK32-NEXT:    fcvt.wu.s a0, fa5, rtz
-; CHECK32-NEXT:    feq.s a1, fa5, fa5
-; CHECK32-NEXT:    seqz a1, a1
-; CHECK32-NEXT:    addi a1, a1, -1
-; CHECK32-NEXT:    and a0, a1, a0
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: fcvt_wu_bf16_sat_zext:
-; CHECK64:       # %bb.0: # %start
-; CHECK64-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK64-NEXT:    fcvt.wu.s a0, fa5, rtz
-; CHECK64-NEXT:    feq.s a1, fa5, fa5
-; CHECK64-NEXT:    seqz a1, a1
-; CHECK64-NEXT:    addiw a1, a1, -1
-; CHECK64-NEXT:    and a0, a0, a1
-; CHECK64-NEXT:    slli a0, a0, 32
-; CHECK64-NEXT:    srli a0, a0, 32
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: fcvt_wu_bf16_sat_zext:
+; CHECK32ZFBFMIN:       # %bb.0: # %start
+; CHECK32ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK32ZFBFMIN-NEXT:    fcvt.wu.s a0, fa5, rtz
+; CHECK32ZFBFMIN-NEXT:    feq.s a1, fa5, fa5
+; CHECK32ZFBFMIN-NEXT:    seqz a1, a1
+; CHECK32ZFBFMIN-NEXT:    addi a1, a1, -1
+; CHECK32ZFBFMIN-NEXT:    and a0, a1, a0
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: fcvt_wu_bf16_sat_zext:
+; RV32ID:       # %bb.0: # %start
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    slli a0, a0, 16
+; RV32ID-NEXT:    fmv.w.x fa5, a0
+; RV32ID-NEXT:    fcvt.wu.s a0, fa5, rtz
+; RV32ID-NEXT:    feq.s a1, fa5, fa5
+; RV32ID-NEXT:    seqz a1, a1
+; RV32ID-NEXT:    addi a1, a1, -1
+; RV32ID-NEXT:    and a0, a1, a0
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: fcvt_wu_bf16_sat_zext:
+; CHECK64ZFBFMIN:       # %bb.0: # %start
+; CHECK64ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK64ZFBFMIN-NEXT:    fcvt.wu.s a0, fa5, rtz
+; CHECK64ZFBFMIN-NEXT:    feq.s a1, fa5, fa5
+; CHECK64ZFBFMIN-NEXT:    seqz a1, a1
+; CHECK64ZFBFMIN-NEXT:    addiw a1, a1, -1
+; CHECK64ZFBFMIN-NEXT:    and a0, a0, a1
+; CHECK64ZFBFMIN-NEXT:    slli a0, a0, 32
+; CHECK64ZFBFMIN-NEXT:    srli a0, a0, 32
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_wu_bf16_sat_zext:
+; RV64ID:       # %bb.0: # %start
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    slli a0, a0, 48
+; RV64ID-NEXT:    srli a0, a0, 48
+; RV64ID-NEXT:    slli a0, a0, 16
+; RV64ID-NEXT:    fmv.w.x fa5, a0
+; RV64ID-NEXT:    fcvt.wu.s a0, fa5, rtz
+; RV64ID-NEXT:    feq.s a1, fa5, fa5
+; RV64ID-NEXT:    seqz a1, a1
+; RV64ID-NEXT:    addiw a1, a1, -1
+; RV64ID-NEXT:    and a0, a0, a1
+; RV64ID-NEXT:    slli a0, a0, 32
+; RV64ID-NEXT:    srli a0, a0, 32
+; RV64ID-NEXT:    ret
 start:
   %0 = tail call i32 @llvm.fptoui.sat.i32.bf16(bfloat %a)
   ret i32 %0
 }
 
 define signext i32 @fcvt_w_bf16_sat_sext(bfloat %a) nounwind {
-; CHECK32-LABEL: fcvt_w_bf16_sat_sext:
-; CHECK32:       # %bb.0: # %start
-; CHECK32-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK32-NEXT:    fcvt.w.s a0, fa5, rtz
-; CHECK32-NEXT:    feq.s a1, fa5, fa5
-; CHECK32-NEXT:    seqz a1, a1
-; CHECK32-NEXT:    addi a1, a1, -1
-; CHECK32-NEXT:    and a0, a1, a0
-; CHECK32-NEXT:    ret
-;
-; CHECK64-LABEL: fcvt_w_bf16_sat_sext:
-; CHECK64:       # %bb.0: # %start
-; CHECK64-NEXT:    fcvt.s.bf16 fa5, fa0
-; CHECK64-NEXT:    fcvt.w.s a0, fa5, rtz
-; CHECK64-NEXT:    feq.s a1, fa5, fa5
-; CHECK64-NEXT:    seqz a1, a1
-; CHECK64-NEXT:    addi a1, a1, -1
-; CHECK64-NEXT:    and a0, a1, a0
-; CHECK64-NEXT:    ret
+; CHECK32ZFBFMIN-LABEL: fcvt_w_bf16_sat_sext:
+; CHECK32ZFBFMIN:       # %bb.0: # %start
+; CHECK32ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK32ZFBFMIN-NEXT:    fcvt.w.s a0, fa5, rtz
+; CHECK32ZFBFMIN-NEXT:    feq.s a1, fa5, fa5
+; CHECK32ZFBFMIN-NEXT:    seqz a1, a1
+; CHECK32ZFBFMIN-NEXT:    addi a1, a1, -1
+; CHECK32ZFBFMIN-NEXT:    and a0, a1, a0
+; CHECK32ZFBFMIN-NEXT:    ret
+;
+; RV32ID-LABEL: fcvt_w_bf16_sat_sext:
+; RV32ID:       # %bb.0: # %start
+; RV32ID-NEXT:    fmv.x.w a0, fa0
+; RV32ID-NEXT:    slli a0, a0, 16
+; RV32ID-NEXT:    fmv.w.x fa5, a0
+; RV32ID-NEXT:    fcvt.w.s a0, fa5, rtz
+; RV32ID-NEXT:    feq.s a1, fa5, fa5
+; RV32ID-NEXT:    seqz a1, a1
+; RV32ID-NEXT:    addi a1, a1, -1
+; RV32ID-NEXT:    and a0, a1, a0
+; RV32ID-NEXT:    ret
+;
+; CHECK64ZFBFMIN-LABEL: fcvt_w_bf16_sat_sext:
+; CHECK64ZFBFMIN:       # %bb.0: # %start
+; CHECK64ZFBFMIN-NEXT:    fcvt.s.bf16 fa5, fa0
+; CHECK64ZFBFMIN-NEXT:    fcvt.w.s a0, fa5, rtz
+; CHECK64ZFBFMIN-NEXT:    feq.s a1, fa5, fa5
+; CHECK64ZFBFMIN-NEXT:    seqz a1, a1
+; CHECK64ZFBFMIN-NEXT:    addi a1, a1, -1
+; CHECK64ZFBFMIN-NEXT:    and a0, a1, a0
+; CHECK64ZFBFMIN-NEXT:    ret
+;
+; RV64ID-LABEL: fcvt_w_bf16_sat_sext:
+; RV64ID:       # %bb.0: # %start
+; RV64ID-NEXT:    fmv.x.w a0, fa0
+; RV64ID-NEXT:    slli a0, a0, 48
+; RV64ID-NEXT:    srli a0, a0, 48
+; RV64ID-NEXT:    slli a0, a0, 16
+; RV64ID-NEXT:    fmv.w.x fa5, a0
+; RV64ID-NEXT:    fcvt.w.s a0, fa5, rtz
+; RV64ID-NEXT:    feq.s a1, fa5, fa5
+; RV64ID-NEXT:    seqz a1, a1
+; RV64ID-NEXT:    addi a1, a1, -1
+; RV64ID-NEXT:    and a0, a1, a0
+; RV64ID-NEXT:    ret
 start:
   %0 = tail call i32 @llvm.fptosi.sat.i32.bf16(bfloat %a)
   ret i32 %0


        


More information about the llvm-commits mailing list