[llvm] [RISCV] Lower fixed-length mload/mstore for zvfhmin/zvfbfmin (PR #115145)

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 6 07:28:56 PST 2024


================
@@ -450,29 +2106,2892 @@ define void @masked_load_v64f32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
 }
 declare <64 x float> @llvm.masked.load.v64f32(ptr, i32, <64 x i1>, <64 x float>)
 
+define void @masked_load_v128bf16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+; RV32-LABEL: masked_load_v128bf16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -512
+; RV32-NEXT:    sw ra, 508(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 504(sp) # 4-byte Folded Spill
+; RV32-NEXT:    addi s0, sp, 512
+; RV32-NEXT:    andi sp, sp, -128
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vle16.v v16, (a1)
+; RV32-NEXT:    addi a1, a1, 128
+; RV32-NEXT:    vle16.v v8, (a1)
+; RV32-NEXT:    addi a1, sp, 256
+; RV32-NEXT:    vse16.v v16, (a1)
+; RV32-NEXT:    addi a1, sp, 128
+; RV32-NEXT:    vse16.v v8, (a1)
+; RV32-NEXT:    lh a1, 320(sp)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa5
+; RV32-NEXT:    fmv.w.x fa5, zero
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 96(sp)
+; RV32-NEXT:    lh a1, 318(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 95(sp)
+; RV32-NEXT:    lh a1, 316(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 94(sp)
+; RV32-NEXT:    lh a1, 314(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 93(sp)
+; RV32-NEXT:    lh a1, 312(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 92(sp)
+; RV32-NEXT:    lh a1, 310(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 91(sp)
+; RV32-NEXT:    lh a1, 308(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 90(sp)
+; RV32-NEXT:    lh a1, 306(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 89(sp)
+; RV32-NEXT:    lh a1, 304(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 88(sp)
+; RV32-NEXT:    lh a1, 302(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 87(sp)
+; RV32-NEXT:    lh a1, 300(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 86(sp)
+; RV32-NEXT:    lh a1, 298(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 85(sp)
+; RV32-NEXT:    lh a1, 296(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 84(sp)
+; RV32-NEXT:    lh a1, 294(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 83(sp)
+; RV32-NEXT:    lh a1, 292(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 82(sp)
+; RV32-NEXT:    lh a1, 290(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 81(sp)
+; RV32-NEXT:    lh a1, 288(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    vmv.x.s a4, v16
+; RV32-NEXT:    fmv.h.x fa4, a4
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a4, fa4, fa5
+; RV32-NEXT:    sb a4, 64(sp)
+; RV32-NEXT:    sb a1, 80(sp)
+; RV32-NEXT:    lh a1, 354(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 113(sp)
+; RV32-NEXT:    lh a1, 352(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 112(sp)
+; RV32-NEXT:    lh a1, 350(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 111(sp)
+; RV32-NEXT:    lh a1, 348(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 110(sp)
+; RV32-NEXT:    lh a1, 346(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 109(sp)
+; RV32-NEXT:    lh a1, 344(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 108(sp)
+; RV32-NEXT:    lh a1, 342(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 107(sp)
+; RV32-NEXT:    lh a1, 340(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 106(sp)
+; RV32-NEXT:    lh a1, 338(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 105(sp)
+; RV32-NEXT:    lh a1, 336(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 104(sp)
+; RV32-NEXT:    lh a1, 334(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 103(sp)
+; RV32-NEXT:    lh a1, 332(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 102(sp)
+; RV32-NEXT:    lh a1, 330(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 101(sp)
+; RV32-NEXT:    lh a1, 328(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 100(sp)
+; RV32-NEXT:    lh a1, 326(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 99(sp)
+; RV32-NEXT:    lh a1, 324(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 98(sp)
+; RV32-NEXT:    lh a1, 322(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v10, v16, 7
+; RV32-NEXT:    vmv.x.s a4, v10
+; RV32-NEXT:    fmv.h.x fa4, a4
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a4, fa4, fa5
+; RV32-NEXT:    vslidedown.vi v10, v16, 6
+; RV32-NEXT:    vmv.x.s a5, v10
+; RV32-NEXT:    fmv.h.x fa4, a5
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a5, fa4, fa5
+; RV32-NEXT:    vslidedown.vi v10, v16, 5
+; RV32-NEXT:    vmv.x.s a6, v10
+; RV32-NEXT:    fmv.h.x fa4, a6
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a6, fa4, fa5
+; RV32-NEXT:    sb a6, 69(sp)
+; RV32-NEXT:    sb a5, 70(sp)
+; RV32-NEXT:    sb a4, 71(sp)
+; RV32-NEXT:    sb a1, 97(sp)
+; RV32-NEXT:    vslidedown.vi v10, v16, 4
+; RV32-NEXT:    vmv.x.s a1, v10
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    vslidedown.vi v10, v16, 3
+; RV32-NEXT:    vmv.x.s a4, v10
+; RV32-NEXT:    fmv.h.x fa4, a4
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a4, fa4, fa5
+; RV32-NEXT:    vslidedown.vi v10, v16, 2
+; RV32-NEXT:    vmv.x.s a5, v10
+; RV32-NEXT:    fmv.h.x fa4, a5
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a5, fa4, fa5
+; RV32-NEXT:    vslidedown.vi v10, v16, 1
+; RV32-NEXT:    vmv.x.s a6, v10
+; RV32-NEXT:    fmv.h.x fa4, a6
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a6, fa4, fa5
+; RV32-NEXT:    sb a6, 65(sp)
+; RV32-NEXT:    sb a5, 66(sp)
+; RV32-NEXT:    sb a4, 67(sp)
+; RV32-NEXT:    sb a1, 68(sp)
+; RV32-NEXT:    lh a1, 382(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 127(sp)
+; RV32-NEXT:    lh a1, 380(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 126(sp)
+; RV32-NEXT:    lh a1, 378(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 125(sp)
+; RV32-NEXT:    lh a1, 376(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 124(sp)
+; RV32-NEXT:    lh a1, 374(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 123(sp)
+; RV32-NEXT:    lh a1, 372(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 122(sp)
+; RV32-NEXT:    lh a1, 370(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 121(sp)
+; RV32-NEXT:    lh a1, 368(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 120(sp)
+; RV32-NEXT:    lh a1, 366(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 119(sp)
+; RV32-NEXT:    lh a1, 364(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 118(sp)
+; RV32-NEXT:    lh a1, 362(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 117(sp)
+; RV32-NEXT:    lh a1, 360(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 116(sp)
+; RV32-NEXT:    lh a1, 358(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 115(sp)
+; RV32-NEXT:    lh a1, 356(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 114(sp)
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v10, v16, 15
+; RV32-NEXT:    vmv.x.s a1, v10
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    vslidedown.vi v10, v16, 14
+; RV32-NEXT:    vmv.x.s a4, v10
+; RV32-NEXT:    fmv.h.x fa4, a4
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a4, fa4, fa5
+; RV32-NEXT:    vslidedown.vi v10, v16, 13
+; RV32-NEXT:    vmv.x.s a5, v10
+; RV32-NEXT:    fmv.h.x fa4, a5
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a5, fa4, fa5
+; RV32-NEXT:    vslidedown.vi v10, v16, 12
+; RV32-NEXT:    vmv.x.s a6, v10
+; RV32-NEXT:    fmv.h.x fa4, a6
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a6, fa4, fa5
+; RV32-NEXT:    sb a6, 76(sp)
+; RV32-NEXT:    sb a5, 77(sp)
+; RV32-NEXT:    sb a4, 78(sp)
+; RV32-NEXT:    sb a1, 79(sp)
+; RV32-NEXT:    vslidedown.vi v10, v16, 11
+; RV32-NEXT:    vmv.x.s a1, v10
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    vslidedown.vi v10, v16, 10
+; RV32-NEXT:    vmv.x.s a4, v10
+; RV32-NEXT:    fmv.h.x fa4, a4
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a4, fa4, fa5
+; RV32-NEXT:    vslidedown.vi v10, v16, 9
+; RV32-NEXT:    vmv.x.s a5, v10
+; RV32-NEXT:    fmv.h.x fa4, a5
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a5, fa4, fa5
+; RV32-NEXT:    vslidedown.vi v10, v16, 8
+; RV32-NEXT:    vmv.x.s a6, v10
+; RV32-NEXT:    fmv.h.x fa4, a6
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a6, fa4, fa5
+; RV32-NEXT:    sb a6, 72(sp)
+; RV32-NEXT:    sb a5, 73(sp)
+; RV32-NEXT:    sb a4, 74(sp)
+; RV32-NEXT:    sb a1, 75(sp)
+; RV32-NEXT:    lh a1, 192(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 32(sp)
+; RV32-NEXT:    lh a1, 190(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 31(sp)
+; RV32-NEXT:    lh a1, 188(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 30(sp)
+; RV32-NEXT:    lh a1, 186(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 29(sp)
+; RV32-NEXT:    lh a1, 184(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 28(sp)
+; RV32-NEXT:    lh a1, 182(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 27(sp)
+; RV32-NEXT:    lh a1, 180(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 26(sp)
+; RV32-NEXT:    lh a1, 178(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 25(sp)
+; RV32-NEXT:    lh a1, 176(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 24(sp)
+; RV32-NEXT:    lh a1, 174(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 23(sp)
+; RV32-NEXT:    lh a1, 172(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 22(sp)
+; RV32-NEXT:    lh a1, 170(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 21(sp)
+; RV32-NEXT:    lh a1, 168(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 20(sp)
+; RV32-NEXT:    lh a1, 166(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 19(sp)
+; RV32-NEXT:    lh a1, 164(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 18(sp)
+; RV32-NEXT:    lh a1, 162(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 17(sp)
+; RV32-NEXT:    lh a1, 160(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    vmv.x.s a4, v8
+; RV32-NEXT:    fmv.h.x fa4, a4
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a4, fa4, fa5
+; RV32-NEXT:    sb a4, 0(sp)
+; RV32-NEXT:    sb a1, 16(sp)
+; RV32-NEXT:    lh a1, 226(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 49(sp)
+; RV32-NEXT:    lh a1, 224(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 48(sp)
+; RV32-NEXT:    lh a1, 222(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 47(sp)
+; RV32-NEXT:    lh a1, 220(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 46(sp)
+; RV32-NEXT:    lh a1, 218(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 45(sp)
+; RV32-NEXT:    lh a1, 216(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 44(sp)
+; RV32-NEXT:    lh a1, 214(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 43(sp)
+; RV32-NEXT:    lh a1, 212(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 42(sp)
+; RV32-NEXT:    lh a1, 210(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 41(sp)
+; RV32-NEXT:    lh a1, 208(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 40(sp)
+; RV32-NEXT:    lh a1, 206(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 39(sp)
+; RV32-NEXT:    lh a1, 204(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 38(sp)
+; RV32-NEXT:    lh a1, 202(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 37(sp)
+; RV32-NEXT:    lh a1, 200(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 36(sp)
+; RV32-NEXT:    lh a1, 198(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 35(sp)
+; RV32-NEXT:    lh a1, 196(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 34(sp)
+; RV32-NEXT:    lh a1, 194(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v10, v8, 7
+; RV32-NEXT:    vmv.x.s a4, v10
+; RV32-NEXT:    fmv.h.x fa4, a4
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a4, fa4, fa5
+; RV32-NEXT:    vslidedown.vi v10, v8, 6
+; RV32-NEXT:    vmv.x.s a5, v10
+; RV32-NEXT:    fmv.h.x fa4, a5
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a5, fa4, fa5
+; RV32-NEXT:    vslidedown.vi v10, v8, 5
+; RV32-NEXT:    vmv.x.s a6, v10
+; RV32-NEXT:    fmv.h.x fa4, a6
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a6, fa4, fa5
+; RV32-NEXT:    sb a6, 5(sp)
+; RV32-NEXT:    sb a5, 6(sp)
+; RV32-NEXT:    sb a4, 7(sp)
+; RV32-NEXT:    sb a1, 33(sp)
+; RV32-NEXT:    vslidedown.vi v10, v8, 4
+; RV32-NEXT:    vmv.x.s a1, v10
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    vslidedown.vi v10, v8, 3
+; RV32-NEXT:    vmv.x.s a4, v10
+; RV32-NEXT:    fmv.h.x fa4, a4
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a4, fa4, fa5
+; RV32-NEXT:    vslidedown.vi v10, v8, 2
+; RV32-NEXT:    vmv.x.s a5, v10
+; RV32-NEXT:    fmv.h.x fa4, a5
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a5, fa4, fa5
+; RV32-NEXT:    vslidedown.vi v10, v8, 1
+; RV32-NEXT:    vmv.x.s a6, v10
+; RV32-NEXT:    fmv.h.x fa4, a6
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a6, fa4, fa5
+; RV32-NEXT:    sb a6, 1(sp)
+; RV32-NEXT:    sb a5, 2(sp)
+; RV32-NEXT:    sb a4, 3(sp)
+; RV32-NEXT:    sb a1, 4(sp)
+; RV32-NEXT:    lh a1, 254(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 63(sp)
+; RV32-NEXT:    lh a1, 252(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 62(sp)
+; RV32-NEXT:    lh a1, 250(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 61(sp)
+; RV32-NEXT:    lh a1, 248(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 60(sp)
+; RV32-NEXT:    lh a1, 246(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 59(sp)
+; RV32-NEXT:    lh a1, 244(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 58(sp)
+; RV32-NEXT:    lh a1, 242(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 57(sp)
+; RV32-NEXT:    lh a1, 240(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 56(sp)
+; RV32-NEXT:    lh a1, 238(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 55(sp)
+; RV32-NEXT:    lh a1, 236(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 54(sp)
+; RV32-NEXT:    lh a1, 234(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 53(sp)
+; RV32-NEXT:    lh a1, 232(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 52(sp)
+; RV32-NEXT:    lh a1, 230(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 51(sp)
+; RV32-NEXT:    lh a1, 228(sp)
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    sb a1, 50(sp)
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v10, v8, 15
+; RV32-NEXT:    vmv.x.s a1, v10
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    vslidedown.vi v10, v8, 14
+; RV32-NEXT:    vmv.x.s a4, v10
+; RV32-NEXT:    fmv.h.x fa4, a4
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a4, fa4, fa5
+; RV32-NEXT:    vslidedown.vi v10, v8, 13
+; RV32-NEXT:    vmv.x.s a5, v10
+; RV32-NEXT:    fmv.h.x fa4, a5
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a5, fa4, fa5
+; RV32-NEXT:    vslidedown.vi v10, v8, 12
+; RV32-NEXT:    vmv.x.s a6, v10
+; RV32-NEXT:    fmv.h.x fa4, a6
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a6, fa4, fa5
+; RV32-NEXT:    sb a6, 12(sp)
+; RV32-NEXT:    sb a5, 13(sp)
+; RV32-NEXT:    sb a4, 14(sp)
+; RV32-NEXT:    sb a1, 15(sp)
+; RV32-NEXT:    vslidedown.vi v10, v8, 11
+; RV32-NEXT:    vmv.x.s a1, v10
+; RV32-NEXT:    fmv.h.x fa4, a1
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a1, fa4, fa5
+; RV32-NEXT:    vslidedown.vi v10, v8, 10
+; RV32-NEXT:    vmv.x.s a4, v10
+; RV32-NEXT:    fmv.h.x fa4, a4
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a4, fa4, fa5
+; RV32-NEXT:    vslidedown.vi v10, v8, 9
+; RV32-NEXT:    vmv.x.s a5, v10
+; RV32-NEXT:    fmv.h.x fa4, a5
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a5, fa4, fa5
+; RV32-NEXT:    vslidedown.vi v8, v8, 8
+; RV32-NEXT:    vmv.x.s a6, v8
+; RV32-NEXT:    fmv.h.x fa4, a6
+; RV32-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV32-NEXT:    feq.s a6, fa4, fa5
+; RV32-NEXT:    sb a6, 8(sp)
+; RV32-NEXT:    sb a5, 9(sp)
+; RV32-NEXT:    sb a4, 10(sp)
+; RV32-NEXT:    sb a1, 11(sp)
+; RV32-NEXT:    addi a1, sp, 64
+; RV32-NEXT:    vsetvli zero, a3, e8, m4, ta, ma
+; RV32-NEXT:    vle8.v v8, (a1)
+; RV32-NEXT:    mv a1, sp
+; RV32-NEXT:    vle8.v v12, (a1)
+; RV32-NEXT:    vand.vi v16, v8, 1
+; RV32-NEXT:    vmsne.vi v8, v16, 0
+; RV32-NEXT:    vand.vi v12, v12, 1
+; RV32-NEXT:    vmsne.vi v0, v12, 0
+; RV32-NEXT:    addi a1, a0, 128
+; RV32-NEXT:    vle16.v v16, (a1), v0.t
+; RV32-NEXT:    vmv1r.v v0, v8
+; RV32-NEXT:    vle16.v v8, (a0), v0.t
+; RV32-NEXT:    vse16.v v8, (a2)
+; RV32-NEXT:    addi a0, a2, 128
+; RV32-NEXT:    vse16.v v16, (a0)
+; RV32-NEXT:    addi sp, s0, -512
+; RV32-NEXT:    lw ra, 508(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 504(sp) # 4-byte Folded Reload
+; RV32-NEXT:    addi sp, sp, 512
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: masked_load_v128bf16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -512
+; RV64-NEXT:    sd ra, 504(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s0, 496(sp) # 8-byte Folded Spill
+; RV64-NEXT:    addi s0, sp, 512
+; RV64-NEXT:    andi sp, sp, -128
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vle16.v v16, (a1)
+; RV64-NEXT:    addi a1, a1, 128
+; RV64-NEXT:    vle16.v v8, (a1)
+; RV64-NEXT:    addi a1, sp, 256
+; RV64-NEXT:    vse16.v v16, (a1)
+; RV64-NEXT:    addi a1, sp, 128
+; RV64-NEXT:    vse16.v v8, (a1)
+; RV64-NEXT:    lh a1, 320(sp)
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa5
+; RV64-NEXT:    fmv.w.x fa5, zero
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 96(sp)
+; RV64-NEXT:    lh a1, 318(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 95(sp)
+; RV64-NEXT:    lh a1, 316(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 94(sp)
+; RV64-NEXT:    lh a1, 314(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 93(sp)
+; RV64-NEXT:    lh a1, 312(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 92(sp)
+; RV64-NEXT:    lh a1, 310(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 91(sp)
+; RV64-NEXT:    lh a1, 308(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 90(sp)
+; RV64-NEXT:    lh a1, 306(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 89(sp)
+; RV64-NEXT:    lh a1, 304(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 88(sp)
+; RV64-NEXT:    lh a1, 302(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 87(sp)
+; RV64-NEXT:    lh a1, 300(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 86(sp)
+; RV64-NEXT:    lh a1, 298(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 85(sp)
+; RV64-NEXT:    lh a1, 296(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 84(sp)
+; RV64-NEXT:    lh a1, 294(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 83(sp)
+; RV64-NEXT:    lh a1, 292(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 82(sp)
+; RV64-NEXT:    lh a1, 290(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 81(sp)
+; RV64-NEXT:    lh a1, 288(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    vmv.x.s a4, v16
+; RV64-NEXT:    fmv.h.x fa4, a4
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a4, fa4, fa5
+; RV64-NEXT:    sb a4, 64(sp)
+; RV64-NEXT:    sb a1, 80(sp)
+; RV64-NEXT:    lh a1, 354(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 113(sp)
+; RV64-NEXT:    lh a1, 352(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 112(sp)
+; RV64-NEXT:    lh a1, 350(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 111(sp)
+; RV64-NEXT:    lh a1, 348(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 110(sp)
+; RV64-NEXT:    lh a1, 346(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 109(sp)
+; RV64-NEXT:    lh a1, 344(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 108(sp)
+; RV64-NEXT:    lh a1, 342(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 107(sp)
+; RV64-NEXT:    lh a1, 340(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 106(sp)
+; RV64-NEXT:    lh a1, 338(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 105(sp)
+; RV64-NEXT:    lh a1, 336(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 104(sp)
+; RV64-NEXT:    lh a1, 334(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 103(sp)
+; RV64-NEXT:    lh a1, 332(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 102(sp)
+; RV64-NEXT:    lh a1, 330(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 101(sp)
+; RV64-NEXT:    lh a1, 328(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 100(sp)
+; RV64-NEXT:    lh a1, 326(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 99(sp)
+; RV64-NEXT:    lh a1, 324(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 98(sp)
+; RV64-NEXT:    lh a1, 322(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v10, v16, 7
+; RV64-NEXT:    vmv.x.s a4, v10
+; RV64-NEXT:    fmv.h.x fa4, a4
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a4, fa4, fa5
+; RV64-NEXT:    vslidedown.vi v10, v16, 6
+; RV64-NEXT:    vmv.x.s a5, v10
+; RV64-NEXT:    fmv.h.x fa4, a5
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a5, fa4, fa5
+; RV64-NEXT:    vslidedown.vi v10, v16, 5
+; RV64-NEXT:    vmv.x.s a6, v10
+; RV64-NEXT:    fmv.h.x fa4, a6
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a6, fa4, fa5
+; RV64-NEXT:    sb a6, 69(sp)
+; RV64-NEXT:    sb a5, 70(sp)
+; RV64-NEXT:    sb a4, 71(sp)
+; RV64-NEXT:    sb a1, 97(sp)
+; RV64-NEXT:    vslidedown.vi v10, v16, 4
+; RV64-NEXT:    vmv.x.s a1, v10
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    vslidedown.vi v10, v16, 3
+; RV64-NEXT:    vmv.x.s a4, v10
+; RV64-NEXT:    fmv.h.x fa4, a4
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a4, fa4, fa5
+; RV64-NEXT:    vslidedown.vi v10, v16, 2
+; RV64-NEXT:    vmv.x.s a5, v10
+; RV64-NEXT:    fmv.h.x fa4, a5
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a5, fa4, fa5
+; RV64-NEXT:    vslidedown.vi v10, v16, 1
+; RV64-NEXT:    vmv.x.s a6, v10
+; RV64-NEXT:    fmv.h.x fa4, a6
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a6, fa4, fa5
+; RV64-NEXT:    sb a6, 65(sp)
+; RV64-NEXT:    sb a5, 66(sp)
+; RV64-NEXT:    sb a4, 67(sp)
+; RV64-NEXT:    sb a1, 68(sp)
+; RV64-NEXT:    lh a1, 382(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 127(sp)
+; RV64-NEXT:    lh a1, 380(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 126(sp)
+; RV64-NEXT:    lh a1, 378(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 125(sp)
+; RV64-NEXT:    lh a1, 376(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 124(sp)
+; RV64-NEXT:    lh a1, 374(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 123(sp)
+; RV64-NEXT:    lh a1, 372(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 122(sp)
+; RV64-NEXT:    lh a1, 370(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 121(sp)
+; RV64-NEXT:    lh a1, 368(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 120(sp)
+; RV64-NEXT:    lh a1, 366(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 119(sp)
+; RV64-NEXT:    lh a1, 364(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 118(sp)
+; RV64-NEXT:    lh a1, 362(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 117(sp)
+; RV64-NEXT:    lh a1, 360(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 116(sp)
+; RV64-NEXT:    lh a1, 358(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 115(sp)
+; RV64-NEXT:    lh a1, 356(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 114(sp)
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v10, v16, 15
+; RV64-NEXT:    vmv.x.s a1, v10
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    vslidedown.vi v10, v16, 14
+; RV64-NEXT:    vmv.x.s a4, v10
+; RV64-NEXT:    fmv.h.x fa4, a4
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a4, fa4, fa5
+; RV64-NEXT:    vslidedown.vi v10, v16, 13
+; RV64-NEXT:    vmv.x.s a5, v10
+; RV64-NEXT:    fmv.h.x fa4, a5
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a5, fa4, fa5
+; RV64-NEXT:    vslidedown.vi v10, v16, 12
+; RV64-NEXT:    vmv.x.s a6, v10
+; RV64-NEXT:    fmv.h.x fa4, a6
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a6, fa4, fa5
+; RV64-NEXT:    sb a6, 76(sp)
+; RV64-NEXT:    sb a5, 77(sp)
+; RV64-NEXT:    sb a4, 78(sp)
+; RV64-NEXT:    sb a1, 79(sp)
+; RV64-NEXT:    vslidedown.vi v10, v16, 11
+; RV64-NEXT:    vmv.x.s a1, v10
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    vslidedown.vi v10, v16, 10
+; RV64-NEXT:    vmv.x.s a4, v10
+; RV64-NEXT:    fmv.h.x fa4, a4
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a4, fa4, fa5
+; RV64-NEXT:    vslidedown.vi v10, v16, 9
+; RV64-NEXT:    vmv.x.s a5, v10
+; RV64-NEXT:    fmv.h.x fa4, a5
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a5, fa4, fa5
+; RV64-NEXT:    vslidedown.vi v10, v16, 8
+; RV64-NEXT:    vmv.x.s a6, v10
+; RV64-NEXT:    fmv.h.x fa4, a6
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a6, fa4, fa5
+; RV64-NEXT:    sb a6, 72(sp)
+; RV64-NEXT:    sb a5, 73(sp)
+; RV64-NEXT:    sb a4, 74(sp)
+; RV64-NEXT:    sb a1, 75(sp)
+; RV64-NEXT:    lh a1, 192(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 32(sp)
+; RV64-NEXT:    lh a1, 190(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 31(sp)
+; RV64-NEXT:    lh a1, 188(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 30(sp)
+; RV64-NEXT:    lh a1, 186(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 29(sp)
+; RV64-NEXT:    lh a1, 184(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 28(sp)
+; RV64-NEXT:    lh a1, 182(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 27(sp)
+; RV64-NEXT:    lh a1, 180(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 26(sp)
+; RV64-NEXT:    lh a1, 178(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 25(sp)
+; RV64-NEXT:    lh a1, 176(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 24(sp)
+; RV64-NEXT:    lh a1, 174(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 23(sp)
+; RV64-NEXT:    lh a1, 172(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 22(sp)
+; RV64-NEXT:    lh a1, 170(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 21(sp)
+; RV64-NEXT:    lh a1, 168(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 20(sp)
+; RV64-NEXT:    lh a1, 166(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 19(sp)
+; RV64-NEXT:    lh a1, 164(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 18(sp)
+; RV64-NEXT:    lh a1, 162(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 17(sp)
+; RV64-NEXT:    lh a1, 160(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    vmv.x.s a4, v8
+; RV64-NEXT:    fmv.h.x fa4, a4
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a4, fa4, fa5
+; RV64-NEXT:    sb a4, 0(sp)
+; RV64-NEXT:    sb a1, 16(sp)
+; RV64-NEXT:    lh a1, 226(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 49(sp)
+; RV64-NEXT:    lh a1, 224(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 48(sp)
+; RV64-NEXT:    lh a1, 222(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 47(sp)
+; RV64-NEXT:    lh a1, 220(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 46(sp)
+; RV64-NEXT:    lh a1, 218(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 45(sp)
+; RV64-NEXT:    lh a1, 216(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 44(sp)
+; RV64-NEXT:    lh a1, 214(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 43(sp)
+; RV64-NEXT:    lh a1, 212(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 42(sp)
+; RV64-NEXT:    lh a1, 210(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 41(sp)
+; RV64-NEXT:    lh a1, 208(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 40(sp)
+; RV64-NEXT:    lh a1, 206(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 39(sp)
+; RV64-NEXT:    lh a1, 204(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 38(sp)
+; RV64-NEXT:    lh a1, 202(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 37(sp)
+; RV64-NEXT:    lh a1, 200(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 36(sp)
+; RV64-NEXT:    lh a1, 198(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 35(sp)
+; RV64-NEXT:    lh a1, 196(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 34(sp)
+; RV64-NEXT:    lh a1, 194(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v10, v8, 7
+; RV64-NEXT:    vmv.x.s a4, v10
+; RV64-NEXT:    fmv.h.x fa4, a4
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a4, fa4, fa5
+; RV64-NEXT:    vslidedown.vi v10, v8, 6
+; RV64-NEXT:    vmv.x.s a5, v10
+; RV64-NEXT:    fmv.h.x fa4, a5
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a5, fa4, fa5
+; RV64-NEXT:    vslidedown.vi v10, v8, 5
+; RV64-NEXT:    vmv.x.s a6, v10
+; RV64-NEXT:    fmv.h.x fa4, a6
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a6, fa4, fa5
+; RV64-NEXT:    sb a6, 5(sp)
+; RV64-NEXT:    sb a5, 6(sp)
+; RV64-NEXT:    sb a4, 7(sp)
+; RV64-NEXT:    sb a1, 33(sp)
+; RV64-NEXT:    vslidedown.vi v10, v8, 4
+; RV64-NEXT:    vmv.x.s a1, v10
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    vslidedown.vi v10, v8, 3
+; RV64-NEXT:    vmv.x.s a4, v10
+; RV64-NEXT:    fmv.h.x fa4, a4
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a4, fa4, fa5
+; RV64-NEXT:    vslidedown.vi v10, v8, 2
+; RV64-NEXT:    vmv.x.s a5, v10
+; RV64-NEXT:    fmv.h.x fa4, a5
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a5, fa4, fa5
+; RV64-NEXT:    vslidedown.vi v10, v8, 1
+; RV64-NEXT:    vmv.x.s a6, v10
+; RV64-NEXT:    fmv.h.x fa4, a6
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a6, fa4, fa5
+; RV64-NEXT:    sb a6, 1(sp)
+; RV64-NEXT:    sb a5, 2(sp)
+; RV64-NEXT:    sb a4, 3(sp)
+; RV64-NEXT:    sb a1, 4(sp)
+; RV64-NEXT:    lh a1, 254(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 63(sp)
+; RV64-NEXT:    lh a1, 252(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 62(sp)
+; RV64-NEXT:    lh a1, 250(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 61(sp)
+; RV64-NEXT:    lh a1, 248(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 60(sp)
+; RV64-NEXT:    lh a1, 246(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 59(sp)
+; RV64-NEXT:    lh a1, 244(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 58(sp)
+; RV64-NEXT:    lh a1, 242(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 57(sp)
+; RV64-NEXT:    lh a1, 240(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 56(sp)
+; RV64-NEXT:    lh a1, 238(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 55(sp)
+; RV64-NEXT:    lh a1, 236(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 54(sp)
+; RV64-NEXT:    lh a1, 234(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 53(sp)
+; RV64-NEXT:    lh a1, 232(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 52(sp)
+; RV64-NEXT:    lh a1, 230(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 51(sp)
+; RV64-NEXT:    lh a1, 228(sp)
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    sb a1, 50(sp)
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v10, v8, 15
+; RV64-NEXT:    vmv.x.s a1, v10
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    vslidedown.vi v10, v8, 14
+; RV64-NEXT:    vmv.x.s a4, v10
+; RV64-NEXT:    fmv.h.x fa4, a4
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a4, fa4, fa5
+; RV64-NEXT:    vslidedown.vi v10, v8, 13
+; RV64-NEXT:    vmv.x.s a5, v10
+; RV64-NEXT:    fmv.h.x fa4, a5
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a5, fa4, fa5
+; RV64-NEXT:    vslidedown.vi v10, v8, 12
+; RV64-NEXT:    vmv.x.s a6, v10
+; RV64-NEXT:    fmv.h.x fa4, a6
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a6, fa4, fa5
+; RV64-NEXT:    sb a6, 12(sp)
+; RV64-NEXT:    sb a5, 13(sp)
+; RV64-NEXT:    sb a4, 14(sp)
+; RV64-NEXT:    sb a1, 15(sp)
+; RV64-NEXT:    vslidedown.vi v10, v8, 11
+; RV64-NEXT:    vmv.x.s a1, v10
+; RV64-NEXT:    fmv.h.x fa4, a1
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a1, fa4, fa5
+; RV64-NEXT:    vslidedown.vi v10, v8, 10
+; RV64-NEXT:    vmv.x.s a4, v10
+; RV64-NEXT:    fmv.h.x fa4, a4
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a4, fa4, fa5
+; RV64-NEXT:    vslidedown.vi v10, v8, 9
+; RV64-NEXT:    vmv.x.s a5, v10
+; RV64-NEXT:    fmv.h.x fa4, a5
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a5, fa4, fa5
+; RV64-NEXT:    vslidedown.vi v8, v8, 8
+; RV64-NEXT:    vmv.x.s a6, v8
+; RV64-NEXT:    fmv.h.x fa4, a6
+; RV64-NEXT:    fcvt.s.bf16 fa4, fa4
+; RV64-NEXT:    feq.s a6, fa4, fa5
+; RV64-NEXT:    sb a6, 8(sp)
+; RV64-NEXT:    sb a5, 9(sp)
+; RV64-NEXT:    sb a4, 10(sp)
+; RV64-NEXT:    sb a1, 11(sp)
+; RV64-NEXT:    addi a1, sp, 64
+; RV64-NEXT:    vsetvli zero, a3, e8, m4, ta, ma
+; RV64-NEXT:    vle8.v v8, (a1)
+; RV64-NEXT:    mv a1, sp
+; RV64-NEXT:    vle8.v v12, (a1)
+; RV64-NEXT:    vand.vi v16, v8, 1
+; RV64-NEXT:    vmsne.vi v8, v16, 0
+; RV64-NEXT:    vand.vi v12, v12, 1
+; RV64-NEXT:    vmsne.vi v0, v12, 0
+; RV64-NEXT:    addi a1, a0, 128
+; RV64-NEXT:    vle16.v v16, (a1), v0.t
+; RV64-NEXT:    vmv1r.v v0, v8
+; RV64-NEXT:    vle16.v v8, (a0), v0.t
+; RV64-NEXT:    vse16.v v8, (a2)
+; RV64-NEXT:    addi a0, a2, 128
+; RV64-NEXT:    vse16.v v16, (a0)
+; RV64-NEXT:    addi sp, s0, -512
+; RV64-NEXT:    ld ra, 504(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 496(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 512
+; RV64-NEXT:    ret
+  %m = load <128 x bfloat>, ptr %m_ptr
+  %mask = fcmp oeq <128 x bfloat> %m, zeroinitializer
+  %load = call <128 x bfloat> @llvm.masked.load.v128bf16(ptr %a, i32 8, <128 x i1> %mask, <128 x bfloat> undef)
+  store <128 x bfloat> %load, ptr %res_ptr
+  ret void
+}
+declare <128 x bfloat> @llvm.masked.load.v128bf16(ptr, i32, <128 x i1>, <128 x bfloat>)
+
 define void @masked_load_v128f16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
-; CHECK-LABEL: masked_load_v128f16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a3, a1, 128
-; CHECK-NEXT:    li a4, 64
-; CHECK-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
-; CHECK-NEXT:    vle16.v v16, (a1)
-; CHECK-NEXT:    vle16.v v24, (a3)
-; CHECK-NEXT:    fmv.h.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v8, v16, fa5
-; CHECK-NEXT:    vmfeq.vf v0, v24, fa5
-; CHECK-NEXT:    addi a1, a0, 128
-; CHECK-NEXT:    vle16.v v16, (a1), v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vle16.v v8, (a0), v0.t
-; CHECK-NEXT:    vse16.v v8, (a2)
-; CHECK-NEXT:    addi a0, a2, 128
-; CHECK-NEXT:    vse16.v v16, (a0)
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: masked_load_v128f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    addi a3, a1, 128
+; ZVFH-NEXT:    li a4, 64
+; ZVFH-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; ZVFH-NEXT:    vle16.v v16, (a1)
+; ZVFH-NEXT:    vle16.v v24, (a3)
+; ZVFH-NEXT:    fmv.h.x fa5, zero
+; ZVFH-NEXT:    vmfeq.vf v8, v16, fa5
+; ZVFH-NEXT:    vmfeq.vf v0, v24, fa5
+; ZVFH-NEXT:    addi a1, a0, 128
+; ZVFH-NEXT:    vle16.v v16, (a1), v0.t
+; ZVFH-NEXT:    vmv1r.v v0, v8
+; ZVFH-NEXT:    vle16.v v8, (a0), v0.t
+; ZVFH-NEXT:    vse16.v v8, (a2)
+; ZVFH-NEXT:    addi a0, a2, 128
+; ZVFH-NEXT:    vse16.v v16, (a0)
+; ZVFH-NEXT:    ret
+;
+; RV32-ZVFHMIN-LABEL: masked_load_v128f16:
+; RV32-ZVFHMIN:       # %bb.0:
+; RV32-ZVFHMIN-NEXT:    addi sp, sp, -512
+; RV32-ZVFHMIN-NEXT:    sw ra, 508(sp) # 4-byte Folded Spill
+; RV32-ZVFHMIN-NEXT:    sw s0, 504(sp) # 4-byte Folded Spill
+; RV32-ZVFHMIN-NEXT:    addi s0, sp, 512
+; RV32-ZVFHMIN-NEXT:    andi sp, sp, -128
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vle16.v v16, (a1)
+; RV32-ZVFHMIN-NEXT:    addi a1, a1, 128
+; RV32-ZVFHMIN-NEXT:    vle16.v v8, (a1)
+; RV32-ZVFHMIN-NEXT:    addi a1, sp, 256
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a1)
+; RV32-ZVFHMIN-NEXT:    addi a1, sp, 128
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a1)
+; RV32-ZVFHMIN-NEXT:    lh a1, 320(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa5
+; RV32-ZVFHMIN-NEXT:    fmv.w.x fa5, zero
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 96(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 318(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 95(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 316(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 94(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 314(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 93(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 312(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 92(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 310(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 91(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 308(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 90(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 306(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 89(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 304(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 88(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 302(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 87(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 300(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 86(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 298(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 85(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 296(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 84(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 294(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 83(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 292(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 82(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 290(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 81(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 288(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a4, v16
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a4
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a4, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a4, 64(sp)
+; RV32-ZVFHMIN-NEXT:    sb a1, 80(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 354(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 113(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 352(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 112(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 350(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 111(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 348(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 110(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 346(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 109(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 344(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 108(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 342(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 107(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 340(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 106(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 338(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 105(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 336(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 104(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 334(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 103(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 332(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 102(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 330(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 101(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 328(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 100(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 326(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 99(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 324(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 98(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 322(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 7
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a4, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a4
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a4, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 6
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a5, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a5
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a5, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 5
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a6, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a6
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a6, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a6, 69(sp)
+; RV32-ZVFHMIN-NEXT:    sb a5, 70(sp)
+; RV32-ZVFHMIN-NEXT:    sb a4, 71(sp)
+; RV32-ZVFHMIN-NEXT:    sb a1, 97(sp)
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 4
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 3
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a4, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a4
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a4, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 2
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a5, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a5
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a5, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 1
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a6, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a6
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a6, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a6, 65(sp)
+; RV32-ZVFHMIN-NEXT:    sb a5, 66(sp)
+; RV32-ZVFHMIN-NEXT:    sb a4, 67(sp)
+; RV32-ZVFHMIN-NEXT:    sb a1, 68(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 382(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 127(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 380(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 126(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 378(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 125(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 376(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 124(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 374(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 123(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 372(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 122(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 370(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 121(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 368(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 120(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 366(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 119(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 364(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 118(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 362(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 117(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 360(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 116(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 358(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 115(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 356(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 114(sp)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 15
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 14
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a4, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a4
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a4, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 13
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a5, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a5
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a5, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 12
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a6, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a6
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a6, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a6, 76(sp)
+; RV32-ZVFHMIN-NEXT:    sb a5, 77(sp)
+; RV32-ZVFHMIN-NEXT:    sb a4, 78(sp)
+; RV32-ZVFHMIN-NEXT:    sb a1, 79(sp)
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 11
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 10
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a4, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a4
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a4, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 9
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a5, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a5
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a5, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 8
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a6, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a6
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a6, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a6, 72(sp)
+; RV32-ZVFHMIN-NEXT:    sb a5, 73(sp)
+; RV32-ZVFHMIN-NEXT:    sb a4, 74(sp)
+; RV32-ZVFHMIN-NEXT:    sb a1, 75(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 192(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 32(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 190(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 31(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 188(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 30(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 186(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 29(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 184(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 28(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 182(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 27(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 180(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 26(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 178(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 25(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 176(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 24(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 174(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 23(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 172(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 22(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 170(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 21(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 168(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 20(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 166(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 19(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 164(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 18(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 162(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 17(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 160(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a4, v8
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a4
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a4, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a4, 0(sp)
+; RV32-ZVFHMIN-NEXT:    sb a1, 16(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 226(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 49(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 224(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 48(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 222(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 47(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 220(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 46(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 218(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 45(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 216(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 44(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 214(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 43(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 212(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 42(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 210(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 41(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 208(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 40(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 206(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 39(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 204(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 38(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 202(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 37(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 200(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 36(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 198(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 35(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 196(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 34(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 194(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 7
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a4, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a4
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a4, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 6
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a5, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a5
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a5, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 5
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a6, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a6
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a6, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a6, 5(sp)
+; RV32-ZVFHMIN-NEXT:    sb a5, 6(sp)
+; RV32-ZVFHMIN-NEXT:    sb a4, 7(sp)
+; RV32-ZVFHMIN-NEXT:    sb a1, 33(sp)
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 4
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 3
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a4, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a4
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a4, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 2
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a5, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a5
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a5, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 1
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a6, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a6
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a6, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a6, 1(sp)
+; RV32-ZVFHMIN-NEXT:    sb a5, 2(sp)
+; RV32-ZVFHMIN-NEXT:    sb a4, 3(sp)
+; RV32-ZVFHMIN-NEXT:    sb a1, 4(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 254(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 63(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 252(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 62(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 250(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 61(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 248(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 60(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 246(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 59(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 244(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 58(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 242(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 57(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 240(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 56(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 238(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 55(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 236(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 54(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 234(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 53(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 232(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 52(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 230(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 51(sp)
+; RV32-ZVFHMIN-NEXT:    lh a1, 228(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a1, 50(sp)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 15
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 14
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a4, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a4
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a4, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 13
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a5, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a5
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a5, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 12
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a6, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a6
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a6, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a6, 12(sp)
+; RV32-ZVFHMIN-NEXT:    sb a5, 13(sp)
+; RV32-ZVFHMIN-NEXT:    sb a4, 14(sp)
+; RV32-ZVFHMIN-NEXT:    sb a1, 15(sp)
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 11
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 10
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a4, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a4
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a4, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 9
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a5, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a5
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a5, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 8
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a6, v8
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa4, a6
+; RV32-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV32-ZVFHMIN-NEXT:    feq.s a6, fa4, fa5
+; RV32-ZVFHMIN-NEXT:    sb a6, 8(sp)
+; RV32-ZVFHMIN-NEXT:    sb a5, 9(sp)
+; RV32-ZVFHMIN-NEXT:    sb a4, 10(sp)
+; RV32-ZVFHMIN-NEXT:    sb a1, 11(sp)
+; RV32-ZVFHMIN-NEXT:    addi a1, sp, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e8, m4, ta, ma
+; RV32-ZVFHMIN-NEXT:    vle8.v v8, (a1)
+; RV32-ZVFHMIN-NEXT:    mv a1, sp
+; RV32-ZVFHMIN-NEXT:    vle8.v v12, (a1)
+; RV32-ZVFHMIN-NEXT:    vand.vi v16, v8, 1
+; RV32-ZVFHMIN-NEXT:    vmsne.vi v8, v16, 0
+; RV32-ZVFHMIN-NEXT:    vand.vi v12, v12, 1
+; RV32-ZVFHMIN-NEXT:    vmsne.vi v0, v12, 0
+; RV32-ZVFHMIN-NEXT:    addi a1, a0, 128
+; RV32-ZVFHMIN-NEXT:    vle16.v v16, (a1), v0.t
+; RV32-ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; RV32-ZVFHMIN-NEXT:    vle16.v v8, (a0), v0.t
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a2)
+; RV32-ZVFHMIN-NEXT:    addi a0, a2, 128
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a0)
+; RV32-ZVFHMIN-NEXT:    addi sp, s0, -512
+; RV32-ZVFHMIN-NEXT:    lw ra, 508(sp) # 4-byte Folded Reload
+; RV32-ZVFHMIN-NEXT:    lw s0, 504(sp) # 4-byte Folded Reload
+; RV32-ZVFHMIN-NEXT:    addi sp, sp, 512
+; RV32-ZVFHMIN-NEXT:    ret
+;
+; RV64-ZVFHMIN-LABEL: masked_load_v128f16:
+; RV64-ZVFHMIN:       # %bb.0:
+; RV64-ZVFHMIN-NEXT:    addi sp, sp, -512
+; RV64-ZVFHMIN-NEXT:    sd ra, 504(sp) # 8-byte Folded Spill
+; RV64-ZVFHMIN-NEXT:    sd s0, 496(sp) # 8-byte Folded Spill
+; RV64-ZVFHMIN-NEXT:    addi s0, sp, 512
+; RV64-ZVFHMIN-NEXT:    andi sp, sp, -128
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vle16.v v16, (a1)
+; RV64-ZVFHMIN-NEXT:    addi a1, a1, 128
+; RV64-ZVFHMIN-NEXT:    vle16.v v8, (a1)
+; RV64-ZVFHMIN-NEXT:    addi a1, sp, 256
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a1)
+; RV64-ZVFHMIN-NEXT:    addi a1, sp, 128
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a1)
+; RV64-ZVFHMIN-NEXT:    lh a1, 320(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa5
+; RV64-ZVFHMIN-NEXT:    fmv.w.x fa5, zero
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 96(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 318(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 95(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 316(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 94(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 314(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 93(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 312(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 92(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 310(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 91(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 308(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 90(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 306(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 89(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 304(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 88(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 302(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 87(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 300(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 86(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 298(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 85(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 296(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 84(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 294(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 83(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 292(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 82(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 290(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 81(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 288(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a4, v16
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a4
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a4, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a4, 64(sp)
+; RV64-ZVFHMIN-NEXT:    sb a1, 80(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 354(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 113(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 352(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 112(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 350(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 111(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 348(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 110(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 346(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 109(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 344(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 108(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 342(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 107(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 340(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 106(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 338(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 105(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 336(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 104(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 334(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 103(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 332(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 102(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 330(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 101(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 328(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 100(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 326(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 99(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 324(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 98(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 322(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 7
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a4, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a4
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a4, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 6
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a5, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a5
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a5, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 5
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a6, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a6
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a6, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a6, 69(sp)
+; RV64-ZVFHMIN-NEXT:    sb a5, 70(sp)
+; RV64-ZVFHMIN-NEXT:    sb a4, 71(sp)
+; RV64-ZVFHMIN-NEXT:    sb a1, 97(sp)
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 4
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 3
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a4, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a4
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a4, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 2
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a5, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a5
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a5, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 1
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a6, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a6
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a6, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a6, 65(sp)
+; RV64-ZVFHMIN-NEXT:    sb a5, 66(sp)
+; RV64-ZVFHMIN-NEXT:    sb a4, 67(sp)
+; RV64-ZVFHMIN-NEXT:    sb a1, 68(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 382(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 127(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 380(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 126(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 378(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 125(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 376(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 124(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 374(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 123(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 372(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 122(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 370(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 121(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 368(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 120(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 366(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 119(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 364(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 118(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 362(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 117(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 360(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 116(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 358(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 115(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 356(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 114(sp)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 15
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 14
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a4, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a4
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a4, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 13
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a5, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a5
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a5, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 12
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a6, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a6
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a6, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a6, 76(sp)
+; RV64-ZVFHMIN-NEXT:    sb a5, 77(sp)
+; RV64-ZVFHMIN-NEXT:    sb a4, 78(sp)
+; RV64-ZVFHMIN-NEXT:    sb a1, 79(sp)
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 11
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 10
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a4, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a4
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a4, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 9
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a5, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a5
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a5, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v16, 8
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a6, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a6
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a6, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a6, 72(sp)
+; RV64-ZVFHMIN-NEXT:    sb a5, 73(sp)
+; RV64-ZVFHMIN-NEXT:    sb a4, 74(sp)
+; RV64-ZVFHMIN-NEXT:    sb a1, 75(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 192(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 32(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 190(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 31(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 188(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 30(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 186(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 29(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 184(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 28(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 182(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 27(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 180(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 26(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 178(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 25(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 176(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 24(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 174(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 23(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 172(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 22(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 170(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 21(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 168(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 20(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 166(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 19(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 164(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 18(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 162(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 17(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 160(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a4, v8
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a4
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a4, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a4, 0(sp)
+; RV64-ZVFHMIN-NEXT:    sb a1, 16(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 226(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 49(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 224(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 48(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 222(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 47(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 220(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 46(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 218(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 45(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 216(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 44(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 214(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 43(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 212(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 42(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 210(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 41(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 208(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 40(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 206(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 39(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 204(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 38(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 202(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 37(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 200(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 36(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 198(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 35(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 196(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 34(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 194(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 7
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a4, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a4
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a4, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 6
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a5, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a5
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a5, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 5
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a6, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a6
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a6, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a6, 5(sp)
+; RV64-ZVFHMIN-NEXT:    sb a5, 6(sp)
+; RV64-ZVFHMIN-NEXT:    sb a4, 7(sp)
+; RV64-ZVFHMIN-NEXT:    sb a1, 33(sp)
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 4
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 3
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a4, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a4
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a4, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 2
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a5, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a5
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a5, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 1
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a6, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a6
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a6, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a6, 1(sp)
+; RV64-ZVFHMIN-NEXT:    sb a5, 2(sp)
+; RV64-ZVFHMIN-NEXT:    sb a4, 3(sp)
+; RV64-ZVFHMIN-NEXT:    sb a1, 4(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 254(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 63(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 252(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 62(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 250(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 61(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 248(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 60(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 246(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 59(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 244(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 58(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 242(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 57(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 240(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 56(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 238(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 55(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 236(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 54(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 234(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 53(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 232(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 52(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 230(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 51(sp)
+; RV64-ZVFHMIN-NEXT:    lh a1, 228(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a1, 50(sp)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 15
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 14
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a4, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a4
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a4, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 13
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a5, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a5
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a5, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 12
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a6, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a6
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a6, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a6, 12(sp)
+; RV64-ZVFHMIN-NEXT:    sb a5, 13(sp)
+; RV64-ZVFHMIN-NEXT:    sb a4, 14(sp)
+; RV64-ZVFHMIN-NEXT:    sb a1, 15(sp)
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 11
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a1
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a1, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 10
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a4, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a4
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a4, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 9
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a5, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a5
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a5, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 8
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a6, v8
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa4, a6
+; RV64-ZVFHMIN-NEXT:    fcvt.s.h fa4, fa4
+; RV64-ZVFHMIN-NEXT:    feq.s a6, fa4, fa5
+; RV64-ZVFHMIN-NEXT:    sb a6, 8(sp)
+; RV64-ZVFHMIN-NEXT:    sb a5, 9(sp)
+; RV64-ZVFHMIN-NEXT:    sb a4, 10(sp)
+; RV64-ZVFHMIN-NEXT:    sb a1, 11(sp)
+; RV64-ZVFHMIN-NEXT:    addi a1, sp, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e8, m4, ta, ma
+; RV64-ZVFHMIN-NEXT:    vle8.v v8, (a1)
+; RV64-ZVFHMIN-NEXT:    mv a1, sp
+; RV64-ZVFHMIN-NEXT:    vle8.v v12, (a1)
+; RV64-ZVFHMIN-NEXT:    vand.vi v16, v8, 1
+; RV64-ZVFHMIN-NEXT:    vmsne.vi v8, v16, 0
+; RV64-ZVFHMIN-NEXT:    vand.vi v12, v12, 1
+; RV64-ZVFHMIN-NEXT:    vmsne.vi v0, v12, 0
+; RV64-ZVFHMIN-NEXT:    addi a1, a0, 128
+; RV64-ZVFHMIN-NEXT:    vle16.v v16, (a1), v0.t
+; RV64-ZVFHMIN-NEXT:    vmv1r.v v0, v8
+; RV64-ZVFHMIN-NEXT:    vle16.v v8, (a0), v0.t
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a2)
+; RV64-ZVFHMIN-NEXT:    addi a0, a2, 128
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a0)
+; RV64-ZVFHMIN-NEXT:    addi sp, s0, -512
+; RV64-ZVFHMIN-NEXT:    ld ra, 504(sp) # 8-byte Folded Reload
+; RV64-ZVFHMIN-NEXT:    ld s0, 496(sp) # 8-byte Folded Reload
+; RV64-ZVFHMIN-NEXT:    addi sp, sp, 512
+; RV64-ZVFHMIN-NEXT:    ret
   %m = load <128 x half>, ptr %m_ptr
----------------
preames wrote:

This is more of a comment on the test structure than anything else, and definitely non-blocking.

Most of the run lines in these files appear to come from the mask generation, not the actual masked load or store.  Maybe you should just load a mask from memory or pass one in as a param instead?  Testing the mask generation is covered (presumably) in other test files, and doesn't need to be duplicated here as well.

https://github.com/llvm/llvm-project/pull/115145


More information about the llvm-commits mailing list