[llvm] 7cb6677 - [RISCV] Rework fixed-length masked load/store tests. NFC

Luke Lau via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 6 18:38:35 PST 2024


Author: Luke Lau
Date: 2024-11-07T10:38:21+08:00
New Revision: 7cb66772e23c2208bb920e826661af244790735f

URL: https://github.com/llvm/llvm-project/commit/7cb66772e23c2208bb920e826661af244790735f
DIFF: https://github.com/llvm/llvm-project/commit/7cb66772e23c2208bb920e826661af244790735f.diff

LOG: [RISCV] Rework fixed-length masked load/store tests. NFC

Pass in the mask and vector directly as arguments, and add tests for
zvfhmin and zvfbfmin.

Added: 
    

Modified: 
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll
index f1d300b300a646..e13c5032f97eeb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll
@@ -1,478 +1,11923 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin,+zfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32,RV32-ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin,+zfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64,RV64-ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin,+zfhmin,+zfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32,RV32-ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin,+zfhmin,+zfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64,RV64-ZVFHMIN
 
-define void @masked_load_v1f16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
-; CHECK-LABEL: masked_load_v1f16:
+define <1 x bfloat> @masked_load_v1bf16(ptr %a, <1 x i1> %mask) {
+; CHECK-LABEL: masked_load_v1bf16:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
+; CHECK-NEXT:    vfirst.m a1, v0
+; CHECK-NEXT:    # implicit-def: $v8
+; CHECK-NEXT:    bnez a1, .LBB0_2
+; CHECK-NEXT:  # %bb.1: # %cond.load
 ; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a1)
-; CHECK-NEXT:    fmv.h.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
-; CHECK-NEXT:    vle16.v v8, (a0), v0.t
-; CHECK-NEXT:    vse16.v v8, (a2)
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:  .LBB0_2: # %else
 ; CHECK-NEXT:    ret
-  %m = load <1 x half>, ptr %m_ptr
-  %mask = fcmp oeq <1 x half> %m, zeroinitializer
+  %load = call <1 x bfloat> @llvm.masked.load.v1bf16(ptr %a, i32 8, <1 x i1> %mask, <1 x bfloat> undef)
+  ret <1 x bfloat> %load
+}
+
+define <1 x half> @masked_load_v1f16(ptr %a, <1 x i1> %mask) {
+; ZVFH-LABEL: masked_load_v1f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; ZVFH-NEXT:    vle16.v v8, (a0), v0.t
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: masked_load_v1f16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
+; ZVFHMIN-NEXT:    vfirst.m a1, v0
+; ZVFHMIN-NEXT:    # implicit-def: $v8
+; ZVFHMIN-NEXT:    bnez a1, .LBB1_2
+; ZVFHMIN-NEXT:  # %bb.1: # %cond.load
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vle16.v v8, (a0)
+; ZVFHMIN-NEXT:  .LBB1_2: # %else
+; ZVFHMIN-NEXT:    ret
   %load = call <1 x half> @llvm.masked.load.v1f16(ptr %a, i32 8, <1 x i1> %mask, <1 x half> undef)
-  store <1 x half> %load, ptr %res_ptr
-  ret void
+  ret <1 x half> %load
 }
-declare <1 x half> @llvm.masked.load.v1f16(ptr, i32, <1 x i1>, <1 x half>)
 
-define void @masked_load_v1f32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <1 x float> @masked_load_v1f32(ptr %a, <1 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v1f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a1)
-; CHECK-NEXT:    fmv.w.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
 ; CHECK-NEXT:    vle32.v v8, (a0), v0.t
-; CHECK-NEXT:    vse32.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <1 x float>, ptr %m_ptr
-  %mask = fcmp oeq <1 x float> %m, zeroinitializer
   %load = call <1 x float> @llvm.masked.load.v1f32(ptr %a, i32 8, <1 x i1> %mask, <1 x float> undef)
-  store <1 x float> %load, ptr %res_ptr
-  ret void
+  ret <1 x float> %load
 }
-declare <1 x float> @llvm.masked.load.v1f32(ptr, i32, <1 x i1>, <1 x float>)
 
-define void @masked_load_v1f64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
-; RV32-LABEL: masked_load_v1f64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT:    vle64.v v8, (a1)
-; RV32-NEXT:    fcvt.d.w fa5, zero
-; RV32-NEXT:    vmfeq.vf v0, v8, fa5
-; RV32-NEXT:    vle64.v v8, (a0), v0.t
-; RV32-NEXT:    vse64.v v8, (a2)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: masked_load_v1f64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vle64.v v8, (a1)
-; RV64-NEXT:    fmv.d.x fa5, zero
-; RV64-NEXT:    vmfeq.vf v0, v8, fa5
-; RV64-NEXT:    vle64.v v8, (a0), v0.t
-; RV64-NEXT:    vse64.v v8, (a2)
-; RV64-NEXT:    ret
-  %m = load <1 x double>, ptr %m_ptr
-  %mask = fcmp oeq <1 x double> %m, zeroinitializer
+define <1 x double> @masked_load_v1f64(ptr %a, <1 x i1> %mask) {
+; CHECK-LABEL: masked_load_v1f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
   %load = call <1 x double> @llvm.masked.load.v1f64(ptr %a, i32 8, <1 x i1> %mask, <1 x double> undef)
-  store <1 x double> %load, ptr %res_ptr
-  ret void
+  ret <1 x double> %load
 }
-declare <1 x double> @llvm.masked.load.v1f64(ptr, i32, <1 x i1>, <1 x double>)
 
-define void @masked_load_v2f16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
-; CHECK-LABEL: masked_load_v2f16:
+define <2 x bfloat> @masked_load_v2bf16(ptr %a, <2 x i1> %mask) {
+; CHECK-LABEL: masked_load_v2bf16:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv.x.s a1, v0
+; CHECK-NEXT:    andi a2, a1, 1
+; CHECK-NEXT:    # implicit-def: $v8
+; CHECK-NEXT:    bnez a2, .LBB4_3
+; CHECK-NEXT:  # %bb.1: # %else
+; CHECK-NEXT:    andi a1, a1, 2
+; CHECK-NEXT:    bnez a1, .LBB4_4
+; CHECK-NEXT:  .LBB4_2: # %else2
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB4_3: # %cond.load
+; CHECK-NEXT:    lh a2, 0(a0)
+; CHECK-NEXT:    fmv.x.h a3, fa5
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a1)
-; CHECK-NEXT:    fmv.h.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
-; CHECK-NEXT:    vle16.v v8, (a0), v0.t
-; CHECK-NEXT:    vse16.v v8, (a2)
+; CHECK-NEXT:    vmv.v.x v8, a2
+; CHECK-NEXT:    vslide1down.vx v8, v8, a3
+; CHECK-NEXT:    andi a1, a1, 2
+; CHECK-NEXT:    beqz a1, .LBB4_2
+; CHECK-NEXT:  .LBB4_4: # %cond.load1
+; CHECK-NEXT:    lh a0, 2(a0)
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vmv.s.x v9, a0
+; CHECK-NEXT:    vslideup.vi v8, v9, 1
 ; CHECK-NEXT:    ret
-  %m = load <2 x half>, ptr %m_ptr
-  %mask = fcmp oeq <2 x half> %m, zeroinitializer
+  %load = call <2 x bfloat> @llvm.masked.load.v2bf16(ptr %a, i32 8, <2 x i1> %mask, <2 x bfloat> undef)
+  ret <2 x bfloat> %load
+}
+
+define <2 x half> @masked_load_v2f16(ptr %a, <2 x i1> %mask) {
+; ZVFH-LABEL: masked_load_v2f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT:    vle16.v v8, (a0), v0.t
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: masked_load_v2f16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.x.s a1, v0
+; ZVFHMIN-NEXT:    andi a2, a1, 1
+; ZVFHMIN-NEXT:    # implicit-def: $v8
+; ZVFHMIN-NEXT:    bnez a2, .LBB5_3
+; ZVFHMIN-NEXT:  # %bb.1: # %else
+; ZVFHMIN-NEXT:    andi a1, a1, 2
+; ZVFHMIN-NEXT:    bnez a1, .LBB5_4
+; ZVFHMIN-NEXT:  .LBB5_2: # %else2
+; ZVFHMIN-NEXT:    ret
+; ZVFHMIN-NEXT:  .LBB5_3: # %cond.load
+; ZVFHMIN-NEXT:    lh a2, 0(a0)
+; ZVFHMIN-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v8, a2
+; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a3
+; ZVFHMIN-NEXT:    andi a1, a1, 2
+; ZVFHMIN-NEXT:    beqz a1, .LBB5_2
+; ZVFHMIN-NEXT:  .LBB5_4: # %cond.load1
+; ZVFHMIN-NEXT:    lh a0, 2(a0)
+; ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vmv.s.x v9, a0
+; ZVFHMIN-NEXT:    vslideup.vi v8, v9, 1
+; ZVFHMIN-NEXT:    ret
   %load = call <2 x half> @llvm.masked.load.v2f16(ptr %a, i32 8, <2 x i1> %mask, <2 x half> undef)
-  store <2 x half> %load, ptr %res_ptr
-  ret void
+  ret <2 x half> %load
 }
-declare <2 x half> @llvm.masked.load.v2f16(ptr, i32, <2 x i1>, <2 x half>)
 
-define void @masked_load_v2f32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <2 x float> @masked_load_v2f32(ptr %a, <2 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a1)
-; CHECK-NEXT:    fmv.w.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
 ; CHECK-NEXT:    vle32.v v8, (a0), v0.t
-; CHECK-NEXT:    vse32.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <2 x float>, ptr %m_ptr
-  %mask = fcmp oeq <2 x float> %m, zeroinitializer
   %load = call <2 x float> @llvm.masked.load.v2f32(ptr %a, i32 8, <2 x i1> %mask, <2 x float> undef)
-  store <2 x float> %load, ptr %res_ptr
-  ret void
+  ret <2 x float> %load
 }
-declare <2 x float> @llvm.masked.load.v2f32(ptr, i32, <2 x i1>, <2 x float>)
 
-define void @masked_load_v2f64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
-; RV32-LABEL: masked_load_v2f64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vle64.v v8, (a1)
-; RV32-NEXT:    fcvt.d.w fa5, zero
-; RV32-NEXT:    vmfeq.vf v0, v8, fa5
-; RV32-NEXT:    vle64.v v8, (a0), v0.t
-; RV32-NEXT:    vse64.v v8, (a2)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: masked_load_v2f64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT:    vle64.v v8, (a1)
-; RV64-NEXT:    fmv.d.x fa5, zero
-; RV64-NEXT:    vmfeq.vf v0, v8, fa5
-; RV64-NEXT:    vle64.v v8, (a0), v0.t
-; RV64-NEXT:    vse64.v v8, (a2)
-; RV64-NEXT:    ret
-  %m = load <2 x double>, ptr %m_ptr
-  %mask = fcmp oeq <2 x double> %m, zeroinitializer
+define <2 x double> @masked_load_v2f64(ptr %a, <2 x i1> %mask) {
+; CHECK-LABEL: masked_load_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
   %load = call <2 x double> @llvm.masked.load.v2f64(ptr %a, i32 8, <2 x i1> %mask, <2 x double> undef)
-  store <2 x double> %load, ptr %res_ptr
-  ret void
+  ret <2 x double> %load
 }
-declare <2 x double> @llvm.masked.load.v2f64(ptr, i32, <2 x i1>, <2 x double>)
 
-define void @masked_load_v4f16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
-; CHECK-LABEL: masked_load_v4f16:
+define <4 x bfloat> @masked_load_v4bf16(ptr %a, <4 x i1> %mask) {
+; CHECK-LABEL: masked_load_v4bf16:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv.x.s a1, v0
+; CHECK-NEXT:    andi a2, a1, 1
+; CHECK-NEXT:    # implicit-def: $v8
+; CHECK-NEXT:    bnez a2, .LBB8_5
+; CHECK-NEXT:  # %bb.1: # %else
+; CHECK-NEXT:    andi a2, a1, 2
+; CHECK-NEXT:    bnez a2, .LBB8_6
+; CHECK-NEXT:  .LBB8_2: # %else2
+; CHECK-NEXT:    andi a2, a1, 4
+; CHECK-NEXT:    bnez a2, .LBB8_7
+; CHECK-NEXT:  .LBB8_3: # %else5
+; CHECK-NEXT:    andi a1, a1, 8
+; CHECK-NEXT:    bnez a1, .LBB8_8
+; CHECK-NEXT:  .LBB8_4: # %else8
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB8_5: # %cond.load
+; CHECK-NEXT:    lh a2, 0(a0)
+; CHECK-NEXT:    fmv.x.h a3, fa5
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.x v8, a3
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, tu, ma
+; CHECK-NEXT:    vmv.s.x v8, a2
+; CHECK-NEXT:    andi a2, a1, 2
+; CHECK-NEXT:    beqz a2, .LBB8_2
+; CHECK-NEXT:  .LBB8_6: # %cond.load1
+; CHECK-NEXT:    lh a2, 2(a0)
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf2, tu, ma
+; CHECK-NEXT:    vmv.s.x v9, a2
+; CHECK-NEXT:    vslideup.vi v8, v9, 1
+; CHECK-NEXT:    andi a2, a1, 4
+; CHECK-NEXT:    beqz a2, .LBB8_3
+; CHECK-NEXT:  .LBB8_7: # %cond.load4
+; CHECK-NEXT:    lh a2, 4(a0)
+; CHECK-NEXT:    vsetivli zero, 3, e16, mf2, tu, ma
+; CHECK-NEXT:    vmv.s.x v9, a2
+; CHECK-NEXT:    vslideup.vi v8, v9, 2
+; CHECK-NEXT:    andi a1, a1, 8
+; CHECK-NEXT:    beqz a1, .LBB8_4
+; CHECK-NEXT:  .LBB8_8: # %cond.load7
+; CHECK-NEXT:    lh a0, 6(a0)
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a1)
-; CHECK-NEXT:    fmv.h.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
-; CHECK-NEXT:    vle16.v v8, (a0), v0.t
-; CHECK-NEXT:    vse16.v v8, (a2)
+; CHECK-NEXT:    vmv.s.x v9, a0
+; CHECK-NEXT:    vslideup.vi v8, v9, 3
 ; CHECK-NEXT:    ret
-  %m = load <4 x half>, ptr %m_ptr
-  %mask = fcmp oeq <4 x half> %m, zeroinitializer
+  %load = call <4 x bfloat> @llvm.masked.load.v4bf16(ptr %a, i32 8, <4 x i1> %mask, <4 x bfloat> undef)
+  ret <4 x bfloat> %load
+}
+
+define <4 x half> @masked_load_v4f16(ptr %a, <4 x i1> %mask) {
+; ZVFH-LABEL: masked_load_v4f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT:    vle16.v v8, (a0), v0.t
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: masked_load_v4f16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.x.s a1, v0
+; ZVFHMIN-NEXT:    andi a2, a1, 1
+; ZVFHMIN-NEXT:    # implicit-def: $v8
+; ZVFHMIN-NEXT:    bnez a2, .LBB9_5
+; ZVFHMIN-NEXT:  # %bb.1: # %else
+; ZVFHMIN-NEXT:    andi a2, a1, 2
+; ZVFHMIN-NEXT:    bnez a2, .LBB9_6
+; ZVFHMIN-NEXT:  .LBB9_2: # %else2
+; ZVFHMIN-NEXT:    andi a2, a1, 4
+; ZVFHMIN-NEXT:    bnez a2, .LBB9_7
+; ZVFHMIN-NEXT:  .LBB9_3: # %else5
+; ZVFHMIN-NEXT:    andi a1, a1, 8
+; ZVFHMIN-NEXT:    bnez a1, .LBB9_8
+; ZVFHMIN-NEXT:  .LBB9_4: # %else8
+; ZVFHMIN-NEXT:    ret
+; ZVFHMIN-NEXT:  .LBB9_5: # %cond.load
+; ZVFHMIN-NEXT:    lh a2, 0(a0)
+; ZVFHMIN-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v8, a3
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, tu, ma
+; ZVFHMIN-NEXT:    vmv.s.x v8, a2
+; ZVFHMIN-NEXT:    andi a2, a1, 2
+; ZVFHMIN-NEXT:    beqz a2, .LBB9_2
+; ZVFHMIN-NEXT:  .LBB9_6: # %cond.load1
+; ZVFHMIN-NEXT:    lh a2, 2(a0)
+; ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf2, tu, ma
+; ZVFHMIN-NEXT:    vmv.s.x v9, a2
+; ZVFHMIN-NEXT:    vslideup.vi v8, v9, 1
+; ZVFHMIN-NEXT:    andi a2, a1, 4
+; ZVFHMIN-NEXT:    beqz a2, .LBB9_3
+; ZVFHMIN-NEXT:  .LBB9_7: # %cond.load4
+; ZVFHMIN-NEXT:    lh a2, 4(a0)
+; ZVFHMIN-NEXT:    vsetivli zero, 3, e16, mf2, tu, ma
+; ZVFHMIN-NEXT:    vmv.s.x v9, a2
+; ZVFHMIN-NEXT:    vslideup.vi v8, v9, 2
+; ZVFHMIN-NEXT:    andi a1, a1, 8
+; ZVFHMIN-NEXT:    beqz a1, .LBB9_4
+; ZVFHMIN-NEXT:  .LBB9_8: # %cond.load7
+; ZVFHMIN-NEXT:    lh a0, 6(a0)
+; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vmv.s.x v9, a0
+; ZVFHMIN-NEXT:    vslideup.vi v8, v9, 3
+; ZVFHMIN-NEXT:    ret
   %load = call <4 x half> @llvm.masked.load.v4f16(ptr %a, i32 8, <4 x i1> %mask, <4 x half> undef)
-  store <4 x half> %load, ptr %res_ptr
-  ret void
+  ret <4 x half> %load
 }
-declare <4 x half> @llvm.masked.load.v4f16(ptr, i32, <4 x i1>, <4 x half>)
 
-define void @masked_load_v4f32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <4 x float> @masked_load_v4f32(ptr %a, <4 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a1)
-; CHECK-NEXT:    fmv.w.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
 ; CHECK-NEXT:    vle32.v v8, (a0), v0.t
-; CHECK-NEXT:    vse32.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <4 x float>, ptr %m_ptr
-  %mask = fcmp oeq <4 x float> %m, zeroinitializer
   %load = call <4 x float> @llvm.masked.load.v4f32(ptr %a, i32 8, <4 x i1> %mask, <4 x float> undef)
-  store <4 x float> %load, ptr %res_ptr
-  ret void
+  ret <4 x float> %load
 }
-declare <4 x float> @llvm.masked.load.v4f32(ptr, i32, <4 x i1>, <4 x float>)
 
-define void @masked_load_v4f64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
-; RV32-LABEL: masked_load_v4f64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vle64.v v8, (a1)
-; RV32-NEXT:    fcvt.d.w fa5, zero
-; RV32-NEXT:    vmfeq.vf v0, v8, fa5
-; RV32-NEXT:    vle64.v v8, (a0), v0.t
-; RV32-NEXT:    vse64.v v8, (a2)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: masked_load_v4f64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT:    vle64.v v8, (a1)
-; RV64-NEXT:    fmv.d.x fa5, zero
-; RV64-NEXT:    vmfeq.vf v0, v8, fa5
-; RV64-NEXT:    vle64.v v8, (a0), v0.t
-; RV64-NEXT:    vse64.v v8, (a2)
-; RV64-NEXT:    ret
-  %m = load <4 x double>, ptr %m_ptr
-  %mask = fcmp oeq <4 x double> %m, zeroinitializer
+define <4 x double> @masked_load_v4f64(ptr %a, <4 x i1> %mask) {
+; CHECK-LABEL: masked_load_v4f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
   %load = call <4 x double> @llvm.masked.load.v4f64(ptr %a, i32 8, <4 x i1> %mask, <4 x double> undef)
-  store <4 x double> %load, ptr %res_ptr
-  ret void
+  ret <4 x double> %load
 }
-declare <4 x double> @llvm.masked.load.v4f64(ptr, i32, <4 x i1>, <4 x double>)
 
-define void @masked_load_v8f16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
-; CHECK-LABEL: masked_load_v8f16:
+define <8 x bfloat> @masked_load_v8bf16(ptr %a, <8 x i1> %mask) {
+; CHECK-LABEL: masked_load_v8bf16:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv.x.s a1, v0
+; CHECK-NEXT:    andi a2, a1, 1
+; CHECK-NEXT:    # implicit-def: $v8
+; CHECK-NEXT:    bnez a2, .LBB12_9
+; CHECK-NEXT:  # %bb.1: # %else
+; CHECK-NEXT:    andi a2, a1, 2
+; CHECK-NEXT:    bnez a2, .LBB12_10
+; CHECK-NEXT:  .LBB12_2: # %else2
+; CHECK-NEXT:    andi a2, a1, 4
+; CHECK-NEXT:    bnez a2, .LBB12_11
+; CHECK-NEXT:  .LBB12_3: # %else5
+; CHECK-NEXT:    andi a2, a1, 8
+; CHECK-NEXT:    bnez a2, .LBB12_12
+; CHECK-NEXT:  .LBB12_4: # %else8
+; CHECK-NEXT:    andi a2, a1, 16
+; CHECK-NEXT:    bnez a2, .LBB12_13
+; CHECK-NEXT:  .LBB12_5: # %else11
+; CHECK-NEXT:    andi a2, a1, 32
+; CHECK-NEXT:    bnez a2, .LBB12_14
+; CHECK-NEXT:  .LBB12_6: # %else14
+; CHECK-NEXT:    andi a2, a1, 64
+; CHECK-NEXT:    bnez a2, .LBB12_15
+; CHECK-NEXT:  .LBB12_7: # %else17
+; CHECK-NEXT:    andi a1, a1, -128
+; CHECK-NEXT:    bnez a1, .LBB12_16
+; CHECK-NEXT:  .LBB12_8: # %else20
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB12_9: # %cond.load
+; CHECK-NEXT:    lh a2, 0(a0)
+; CHECK-NEXT:    fmv.x.h a3, fa5
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vmv.v.x v8, a3
+; CHECK-NEXT:    vsetvli zero, zero, e16, m1, tu, ma
+; CHECK-NEXT:    vmv.s.x v8, a2
+; CHECK-NEXT:    andi a2, a1, 2
+; CHECK-NEXT:    beqz a2, .LBB12_2
+; CHECK-NEXT:  .LBB12_10: # %cond.load1
+; CHECK-NEXT:    lh a2, 2(a0)
+; CHECK-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
+; CHECK-NEXT:    vmv.s.x v9, a2
+; CHECK-NEXT:    vslideup.vi v8, v9, 1
+; CHECK-NEXT:    andi a2, a1, 4
+; CHECK-NEXT:    beqz a2, .LBB12_3
+; CHECK-NEXT:  .LBB12_11: # %cond.load4
+; CHECK-NEXT:    lh a2, 4(a0)
+; CHECK-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
+; CHECK-NEXT:    vmv.s.x v9, a2
+; CHECK-NEXT:    vslideup.vi v8, v9, 2
+; CHECK-NEXT:    andi a2, a1, 8
+; CHECK-NEXT:    beqz a2, .LBB12_4
+; CHECK-NEXT:  .LBB12_12: # %cond.load7
+; CHECK-NEXT:    lh a2, 6(a0)
+; CHECK-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
+; CHECK-NEXT:    vmv.s.x v9, a2
+; CHECK-NEXT:    vslideup.vi v8, v9, 3
+; CHECK-NEXT:    andi a2, a1, 16
+; CHECK-NEXT:    beqz a2, .LBB12_5
+; CHECK-NEXT:  .LBB12_13: # %cond.load10
+; CHECK-NEXT:    lh a2, 8(a0)
+; CHECK-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
+; CHECK-NEXT:    vmv.s.x v9, a2
+; CHECK-NEXT:    vslideup.vi v8, v9, 4
+; CHECK-NEXT:    andi a2, a1, 32
+; CHECK-NEXT:    beqz a2, .LBB12_6
+; CHECK-NEXT:  .LBB12_14: # %cond.load13
+; CHECK-NEXT:    lh a2, 10(a0)
+; CHECK-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
+; CHECK-NEXT:    vmv.s.x v9, a2
+; CHECK-NEXT:    vslideup.vi v8, v9, 5
+; CHECK-NEXT:    andi a2, a1, 64
+; CHECK-NEXT:    beqz a2, .LBB12_7
+; CHECK-NEXT:  .LBB12_15: # %cond.load16
+; CHECK-NEXT:    lh a2, 12(a0)
+; CHECK-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
+; CHECK-NEXT:    vmv.s.x v9, a2
+; CHECK-NEXT:    vslideup.vi v8, v9, 6
+; CHECK-NEXT:    andi a1, a1, -128
+; CHECK-NEXT:    beqz a1, .LBB12_8
+; CHECK-NEXT:  .LBB12_16: # %cond.load19
+; CHECK-NEXT:    lh a0, 14(a0)
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a1)
-; CHECK-NEXT:    fmv.h.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
-; CHECK-NEXT:    vle16.v v8, (a0), v0.t
-; CHECK-NEXT:    vse16.v v8, (a2)
+; CHECK-NEXT:    vmv.s.x v9, a0
+; CHECK-NEXT:    vslideup.vi v8, v9, 7
 ; CHECK-NEXT:    ret
-  %m = load <8 x half>, ptr %m_ptr
-  %mask = fcmp oeq <8 x half> %m, zeroinitializer
+  %load = call <8 x bfloat> @llvm.masked.load.v8bf16(ptr %a, i32 8, <8 x i1> %mask, <8 x bfloat> undef)
+  ret <8 x bfloat> %load
+}
+
+define <8 x half> @masked_load_v8f16(ptr %a, <8 x i1> %mask) {
+; ZVFH-LABEL: masked_load_v8f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT:    vle16.v v8, (a0), v0.t
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: masked_load_v8f16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.x.s a1, v0
+; ZVFHMIN-NEXT:    andi a2, a1, 1
+; ZVFHMIN-NEXT:    # implicit-def: $v8
+; ZVFHMIN-NEXT:    bnez a2, .LBB13_9
+; ZVFHMIN-NEXT:  # %bb.1: # %else
+; ZVFHMIN-NEXT:    andi a2, a1, 2
+; ZVFHMIN-NEXT:    bnez a2, .LBB13_10
+; ZVFHMIN-NEXT:  .LBB13_2: # %else2
+; ZVFHMIN-NEXT:    andi a2, a1, 4
+; ZVFHMIN-NEXT:    bnez a2, .LBB13_11
+; ZVFHMIN-NEXT:  .LBB13_3: # %else5
+; ZVFHMIN-NEXT:    andi a2, a1, 8
+; ZVFHMIN-NEXT:    bnez a2, .LBB13_12
+; ZVFHMIN-NEXT:  .LBB13_4: # %else8
+; ZVFHMIN-NEXT:    andi a2, a1, 16
+; ZVFHMIN-NEXT:    bnez a2, .LBB13_13
+; ZVFHMIN-NEXT:  .LBB13_5: # %else11
+; ZVFHMIN-NEXT:    andi a2, a1, 32
+; ZVFHMIN-NEXT:    bnez a2, .LBB13_14
+; ZVFHMIN-NEXT:  .LBB13_6: # %else14
+; ZVFHMIN-NEXT:    andi a2, a1, 64
+; ZVFHMIN-NEXT:    bnez a2, .LBB13_15
+; ZVFHMIN-NEXT:  .LBB13_7: # %else17
+; ZVFHMIN-NEXT:    andi a1, a1, -128
+; ZVFHMIN-NEXT:    bnez a1, .LBB13_16
+; ZVFHMIN-NEXT:  .LBB13_8: # %else20
+; ZVFHMIN-NEXT:    ret
+; ZVFHMIN-NEXT:  .LBB13_9: # %cond.load
+; ZVFHMIN-NEXT:    lh a2, 0(a0)
+; ZVFHMIN-NEXT:    fmv.x.h a3, fa5
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.v.x v8, a3
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m1, tu, ma
+; ZVFHMIN-NEXT:    vmv.s.x v8, a2
+; ZVFHMIN-NEXT:    andi a2, a1, 2
+; ZVFHMIN-NEXT:    beqz a2, .LBB13_2
+; ZVFHMIN-NEXT:  .LBB13_10: # %cond.load1
+; ZVFHMIN-NEXT:    lh a2, 2(a0)
+; ZVFHMIN-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
+; ZVFHMIN-NEXT:    vmv.s.x v9, a2
+; ZVFHMIN-NEXT:    vslideup.vi v8, v9, 1
+; ZVFHMIN-NEXT:    andi a2, a1, 4
+; ZVFHMIN-NEXT:    beqz a2, .LBB13_3
+; ZVFHMIN-NEXT:  .LBB13_11: # %cond.load4
+; ZVFHMIN-NEXT:    lh a2, 4(a0)
+; ZVFHMIN-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
+; ZVFHMIN-NEXT:    vmv.s.x v9, a2
+; ZVFHMIN-NEXT:    vslideup.vi v8, v9, 2
+; ZVFHMIN-NEXT:    andi a2, a1, 8
+; ZVFHMIN-NEXT:    beqz a2, .LBB13_4
+; ZVFHMIN-NEXT:  .LBB13_12: # %cond.load7
+; ZVFHMIN-NEXT:    lh a2, 6(a0)
+; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
+; ZVFHMIN-NEXT:    vmv.s.x v9, a2
+; ZVFHMIN-NEXT:    vslideup.vi v8, v9, 3
+; ZVFHMIN-NEXT:    andi a2, a1, 16
+; ZVFHMIN-NEXT:    beqz a2, .LBB13_5
+; ZVFHMIN-NEXT:  .LBB13_13: # %cond.load10
+; ZVFHMIN-NEXT:    lh a2, 8(a0)
+; ZVFHMIN-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
+; ZVFHMIN-NEXT:    vmv.s.x v9, a2
+; ZVFHMIN-NEXT:    vslideup.vi v8, v9, 4
+; ZVFHMIN-NEXT:    andi a2, a1, 32
+; ZVFHMIN-NEXT:    beqz a2, .LBB13_6
+; ZVFHMIN-NEXT:  .LBB13_14: # %cond.load13
+; ZVFHMIN-NEXT:    lh a2, 10(a0)
+; ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
+; ZVFHMIN-NEXT:    vmv.s.x v9, a2
+; ZVFHMIN-NEXT:    vslideup.vi v8, v9, 5
+; ZVFHMIN-NEXT:    andi a2, a1, 64
+; ZVFHMIN-NEXT:    beqz a2, .LBB13_7
+; ZVFHMIN-NEXT:  .LBB13_15: # %cond.load16
+; ZVFHMIN-NEXT:    lh a2, 12(a0)
+; ZVFHMIN-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
+; ZVFHMIN-NEXT:    vmv.s.x v9, a2
+; ZVFHMIN-NEXT:    vslideup.vi v8, v9, 6
+; ZVFHMIN-NEXT:    andi a1, a1, -128
+; ZVFHMIN-NEXT:    beqz a1, .LBB13_8
+; ZVFHMIN-NEXT:  .LBB13_16: # %cond.load19
+; ZVFHMIN-NEXT:    lh a0, 14(a0)
+; ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.s.x v9, a0
+; ZVFHMIN-NEXT:    vslideup.vi v8, v9, 7
+; ZVFHMIN-NEXT:    ret
   %load = call <8 x half> @llvm.masked.load.v8f16(ptr %a, i32 8, <8 x i1> %mask, <8 x half> undef)
-  store <8 x half> %load, ptr %res_ptr
-  ret void
+  ret <8 x half> %load
 }
-declare <8 x half> @llvm.masked.load.v8f16(ptr, i32, <8 x i1>, <8 x half>)
 
-define void @masked_load_v8f32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <8 x float> @masked_load_v8f32(ptr %a, <8 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a1)
-; CHECK-NEXT:    fmv.w.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
 ; CHECK-NEXT:    vle32.v v8, (a0), v0.t
-; CHECK-NEXT:    vse32.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <8 x float>, ptr %m_ptr
-  %mask = fcmp oeq <8 x float> %m, zeroinitializer
   %load = call <8 x float> @llvm.masked.load.v8f32(ptr %a, i32 8, <8 x i1> %mask, <8 x float> undef)
-  store <8 x float> %load, ptr %res_ptr
-  ret void
+  ret <8 x float> %load
+}
+
+define <8 x double> @masked_load_v8f64(ptr %a, <8 x i1> %mask) {
+; CHECK-LABEL: masked_load_v8f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+  %load = call <8 x double> @llvm.masked.load.v8f64(ptr %a, i32 8, <8 x i1> %mask, <8 x double> undef)
+  ret <8 x double> %load
 }
-declare <8 x float> @llvm.masked.load.v8f32(ptr, i32, <8 x i1>, <8 x float>)
 
-define void @masked_load_v8f64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
-; RV32-LABEL: masked_load_v8f64:
+define <16 x bfloat> @masked_load_v16bf16(ptr %a, <16 x i1> %mask) {
+; RV32-LABEL: masked_load_v16bf16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vle64.v v8, (a1)
-; RV32-NEXT:    fcvt.d.w fa5, zero
-; RV32-NEXT:    vmfeq.vf v0, v8, fa5
-; RV32-NEXT:    vle64.v v8, (a0), v0.t
-; RV32-NEXT:    vse64.v v8, (a2)
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vmv.x.s a1, v0
+; RV32-NEXT:    andi a2, a1, 1
+; RV32-NEXT:    # implicit-def: $v8m2
+; RV32-NEXT:    bnez a2, .LBB16_19
+; RV32-NEXT:  # %bb.1: # %else
+; RV32-NEXT:    andi a2, a1, 2
+; RV32-NEXT:    bnez a2, .LBB16_20
+; RV32-NEXT:  .LBB16_2: # %else2
+; RV32-NEXT:    andi a2, a1, 4
+; RV32-NEXT:    bnez a2, .LBB16_21
+; RV32-NEXT:  .LBB16_3: # %else5
+; RV32-NEXT:    andi a2, a1, 8
+; RV32-NEXT:    bnez a2, .LBB16_22
+; RV32-NEXT:  .LBB16_4: # %else8
+; RV32-NEXT:    andi a2, a1, 16
+; RV32-NEXT:    bnez a2, .LBB16_23
+; RV32-NEXT:  .LBB16_5: # %else11
+; RV32-NEXT:    andi a2, a1, 32
+; RV32-NEXT:    bnez a2, .LBB16_24
+; RV32-NEXT:  .LBB16_6: # %else14
+; RV32-NEXT:    andi a2, a1, 64
+; RV32-NEXT:    bnez a2, .LBB16_25
+; RV32-NEXT:  .LBB16_7: # %else17
+; RV32-NEXT:    andi a2, a1, 128
+; RV32-NEXT:    bnez a2, .LBB16_26
+; RV32-NEXT:  .LBB16_8: # %else20
+; RV32-NEXT:    andi a2, a1, 256
+; RV32-NEXT:    bnez a2, .LBB16_27
+; RV32-NEXT:  .LBB16_9: # %else23
+; RV32-NEXT:    andi a2, a1, 512
+; RV32-NEXT:    bnez a2, .LBB16_28
+; RV32-NEXT:  .LBB16_10: # %else26
+; RV32-NEXT:    andi a2, a1, 1024
+; RV32-NEXT:    bnez a2, .LBB16_29
+; RV32-NEXT:  .LBB16_11: # %else29
+; RV32-NEXT:    slli a2, a1, 20
+; RV32-NEXT:    bltz a2, .LBB16_30
+; RV32-NEXT:  .LBB16_12: # %else32
+; RV32-NEXT:    slli a2, a1, 19
+; RV32-NEXT:    bltz a2, .LBB16_31
+; RV32-NEXT:  .LBB16_13: # %else35
+; RV32-NEXT:    slli a2, a1, 18
+; RV32-NEXT:    bltz a2, .LBB16_32
+; RV32-NEXT:  .LBB16_14: # %else38
+; RV32-NEXT:    slli a2, a1, 17
+; RV32-NEXT:    bgez a2, .LBB16_16
+; RV32-NEXT:  .LBB16_15: # %cond.load40
+; RV32-NEXT:    lh a2, 28(a0)
+; RV32-NEXT:    vsetivli zero, 15, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v10, a2
+; RV32-NEXT:    vslideup.vi v8, v10, 14
+; RV32-NEXT:  .LBB16_16: # %else41
+; RV32-NEXT:    lui a2, 1048568
+; RV32-NEXT:    and a1, a1, a2
+; RV32-NEXT:    beqz a1, .LBB16_18
+; RV32-NEXT:  # %bb.17: # %cond.load43
+; RV32-NEXT:    lh a0, 30(a0)
+; RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RV32-NEXT:    vmv.s.x v10, a0
+; RV32-NEXT:    vslideup.vi v8, v10, 15
+; RV32-NEXT:  .LBB16_18: # %else44
 ; RV32-NEXT:    ret
+; RV32-NEXT:  .LBB16_19: # %cond.load
+; RV32-NEXT:    lh a2, 0(a0)
+; RV32-NEXT:    fmv.x.h a3, fa5
+; RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RV32-NEXT:    vmv.v.x v8, a3
+; RV32-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v8, a2
+; RV32-NEXT:    andi a2, a1, 2
+; RV32-NEXT:    beqz a2, .LBB16_2
+; RV32-NEXT:  .LBB16_20: # %cond.load1
+; RV32-NEXT:    lh a2, 2(a0)
+; RV32-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v10, a2
+; RV32-NEXT:    vslideup.vi v8, v10, 1
+; RV32-NEXT:    andi a2, a1, 4
+; RV32-NEXT:    beqz a2, .LBB16_3
+; RV32-NEXT:  .LBB16_21: # %cond.load4
+; RV32-NEXT:    lh a2, 4(a0)
+; RV32-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v10, a2
+; RV32-NEXT:    vslideup.vi v8, v10, 2
+; RV32-NEXT:    andi a2, a1, 8
+; RV32-NEXT:    beqz a2, .LBB16_4
+; RV32-NEXT:  .LBB16_22: # %cond.load7
+; RV32-NEXT:    lh a2, 6(a0)
+; RV32-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v10, a2
+; RV32-NEXT:    vslideup.vi v8, v10, 3
+; RV32-NEXT:    andi a2, a1, 16
+; RV32-NEXT:    beqz a2, .LBB16_5
+; RV32-NEXT:  .LBB16_23: # %cond.load10
+; RV32-NEXT:    lh a2, 8(a0)
+; RV32-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v10, a2
+; RV32-NEXT:    vslideup.vi v8, v10, 4
+; RV32-NEXT:    andi a2, a1, 32
+; RV32-NEXT:    beqz a2, .LBB16_6
+; RV32-NEXT:  .LBB16_24: # %cond.load13
+; RV32-NEXT:    lh a2, 10(a0)
+; RV32-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v10, a2
+; RV32-NEXT:    vslideup.vi v8, v10, 5
+; RV32-NEXT:    andi a2, a1, 64
+; RV32-NEXT:    beqz a2, .LBB16_7
+; RV32-NEXT:  .LBB16_25: # %cond.load16
+; RV32-NEXT:    lh a2, 12(a0)
+; RV32-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v10, a2
+; RV32-NEXT:    vslideup.vi v8, v10, 6
+; RV32-NEXT:    andi a2, a1, 128
+; RV32-NEXT:    beqz a2, .LBB16_8
+; RV32-NEXT:  .LBB16_26: # %cond.load19
+; RV32-NEXT:    lh a2, 14(a0)
+; RV32-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v10, a2
+; RV32-NEXT:    vslideup.vi v8, v10, 7
+; RV32-NEXT:    andi a2, a1, 256
+; RV32-NEXT:    beqz a2, .LBB16_9
+; RV32-NEXT:  .LBB16_27: # %cond.load22
+; RV32-NEXT:    lh a2, 16(a0)
+; RV32-NEXT:    vsetivli zero, 9, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v10, a2
+; RV32-NEXT:    vslideup.vi v8, v10, 8
+; RV32-NEXT:    andi a2, a1, 512
+; RV32-NEXT:    beqz a2, .LBB16_10
+; RV32-NEXT:  .LBB16_28: # %cond.load25
+; RV32-NEXT:    lh a2, 18(a0)
+; RV32-NEXT:    vsetivli zero, 10, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v10, a2
+; RV32-NEXT:    vslideup.vi v8, v10, 9
+; RV32-NEXT:    andi a2, a1, 1024
+; RV32-NEXT:    beqz a2, .LBB16_11
+; RV32-NEXT:  .LBB16_29: # %cond.load28
+; RV32-NEXT:    lh a2, 20(a0)
+; RV32-NEXT:    vsetivli zero, 11, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v10, a2
+; RV32-NEXT:    vslideup.vi v8, v10, 10
+; RV32-NEXT:    slli a2, a1, 20
+; RV32-NEXT:    bgez a2, .LBB16_12
+; RV32-NEXT:  .LBB16_30: # %cond.load31
+; RV32-NEXT:    lh a2, 22(a0)
+; RV32-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v10, a2
+; RV32-NEXT:    vslideup.vi v8, v10, 11
+; RV32-NEXT:    slli a2, a1, 19
+; RV32-NEXT:    bgez a2, .LBB16_13
+; RV32-NEXT:  .LBB16_31: # %cond.load34
+; RV32-NEXT:    lh a2, 24(a0)
+; RV32-NEXT:    vsetivli zero, 13, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v10, a2
+; RV32-NEXT:    vslideup.vi v8, v10, 12
+; RV32-NEXT:    slli a2, a1, 18
+; RV32-NEXT:    bgez a2, .LBB16_14
+; RV32-NEXT:  .LBB16_32: # %cond.load37
+; RV32-NEXT:    lh a2, 26(a0)
+; RV32-NEXT:    vsetivli zero, 14, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v10, a2
+; RV32-NEXT:    vslideup.vi v8, v10, 13
+; RV32-NEXT:    slli a2, a1, 17
+; RV32-NEXT:    bltz a2, .LBB16_15
+; RV32-NEXT:    j .LBB16_16
 ;
-; RV64-LABEL: masked_load_v8f64:
+; RV64-LABEL: masked_load_v16bf16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT:    vle64.v v8, (a1)
-; RV64-NEXT:    fmv.d.x fa5, zero
-; RV64-NEXT:    vmfeq.vf v0, v8, fa5
-; RV64-NEXT:    vle64.v v8, (a0), v0.t
-; RV64-NEXT:    vse64.v v8, (a2)
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vmv.x.s a1, v0
+; RV64-NEXT:    andi a2, a1, 1
+; RV64-NEXT:    # implicit-def: $v8m2
+; RV64-NEXT:    bnez a2, .LBB16_19
+; RV64-NEXT:  # %bb.1: # %else
+; RV64-NEXT:    andi a2, a1, 2
+; RV64-NEXT:    bnez a2, .LBB16_20
+; RV64-NEXT:  .LBB16_2: # %else2
+; RV64-NEXT:    andi a2, a1, 4
+; RV64-NEXT:    bnez a2, .LBB16_21
+; RV64-NEXT:  .LBB16_3: # %else5
+; RV64-NEXT:    andi a2, a1, 8
+; RV64-NEXT:    bnez a2, .LBB16_22
+; RV64-NEXT:  .LBB16_4: # %else8
+; RV64-NEXT:    andi a2, a1, 16
+; RV64-NEXT:    bnez a2, .LBB16_23
+; RV64-NEXT:  .LBB16_5: # %else11
+; RV64-NEXT:    andi a2, a1, 32
+; RV64-NEXT:    bnez a2, .LBB16_24
+; RV64-NEXT:  .LBB16_6: # %else14
+; RV64-NEXT:    andi a2, a1, 64
+; RV64-NEXT:    bnez a2, .LBB16_25
+; RV64-NEXT:  .LBB16_7: # %else17
+; RV64-NEXT:    andi a2, a1, 128
+; RV64-NEXT:    bnez a2, .LBB16_26
+; RV64-NEXT:  .LBB16_8: # %else20
+; RV64-NEXT:    andi a2, a1, 256
+; RV64-NEXT:    bnez a2, .LBB16_27
+; RV64-NEXT:  .LBB16_9: # %else23
+; RV64-NEXT:    andi a2, a1, 512
+; RV64-NEXT:    bnez a2, .LBB16_28
+; RV64-NEXT:  .LBB16_10: # %else26
+; RV64-NEXT:    andi a2, a1, 1024
+; RV64-NEXT:    bnez a2, .LBB16_29
+; RV64-NEXT:  .LBB16_11: # %else29
+; RV64-NEXT:    slli a2, a1, 52
+; RV64-NEXT:    bltz a2, .LBB16_30
+; RV64-NEXT:  .LBB16_12: # %else32
+; RV64-NEXT:    slli a2, a1, 51
+; RV64-NEXT:    bltz a2, .LBB16_31
+; RV64-NEXT:  .LBB16_13: # %else35
+; RV64-NEXT:    slli a2, a1, 50
+; RV64-NEXT:    bltz a2, .LBB16_32
+; RV64-NEXT:  .LBB16_14: # %else38
+; RV64-NEXT:    slli a2, a1, 49
+; RV64-NEXT:    bgez a2, .LBB16_16
+; RV64-NEXT:  .LBB16_15: # %cond.load40
+; RV64-NEXT:    lh a2, 28(a0)
+; RV64-NEXT:    vsetivli zero, 15, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v10, a2
+; RV64-NEXT:    vslideup.vi v8, v10, 14
+; RV64-NEXT:  .LBB16_16: # %else41
+; RV64-NEXT:    lui a2, 1048568
+; RV64-NEXT:    and a1, a1, a2
+; RV64-NEXT:    beqz a1, .LBB16_18
+; RV64-NEXT:  # %bb.17: # %cond.load43
+; RV64-NEXT:    lh a0, 30(a0)
+; RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RV64-NEXT:    vmv.s.x v10, a0
+; RV64-NEXT:    vslideup.vi v8, v10, 15
+; RV64-NEXT:  .LBB16_18: # %else44
 ; RV64-NEXT:    ret
-  %m = load <8 x double>, ptr %m_ptr
-  %mask = fcmp oeq <8 x double> %m, zeroinitializer
-  %load = call <8 x double> @llvm.masked.load.v8f64(ptr %a, i32 8, <8 x i1> %mask, <8 x double> undef)
-  store <8 x double> %load, ptr %res_ptr
-  ret void
+; RV64-NEXT:  .LBB16_19: # %cond.load
+; RV64-NEXT:    lh a2, 0(a0)
+; RV64-NEXT:    fmv.x.h a3, fa5
+; RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a3
+; RV64-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v8, a2
+; RV64-NEXT:    andi a2, a1, 2
+; RV64-NEXT:    beqz a2, .LBB16_2
+; RV64-NEXT:  .LBB16_20: # %cond.load1
+; RV64-NEXT:    lh a2, 2(a0)
+; RV64-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v10, a2
+; RV64-NEXT:    vslideup.vi v8, v10, 1
+; RV64-NEXT:    andi a2, a1, 4
+; RV64-NEXT:    beqz a2, .LBB16_3
+; RV64-NEXT:  .LBB16_21: # %cond.load4
+; RV64-NEXT:    lh a2, 4(a0)
+; RV64-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v10, a2
+; RV64-NEXT:    vslideup.vi v8, v10, 2
+; RV64-NEXT:    andi a2, a1, 8
+; RV64-NEXT:    beqz a2, .LBB16_4
+; RV64-NEXT:  .LBB16_22: # %cond.load7
+; RV64-NEXT:    lh a2, 6(a0)
+; RV64-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v10, a2
+; RV64-NEXT:    vslideup.vi v8, v10, 3
+; RV64-NEXT:    andi a2, a1, 16
+; RV64-NEXT:    beqz a2, .LBB16_5
+; RV64-NEXT:  .LBB16_23: # %cond.load10
+; RV64-NEXT:    lh a2, 8(a0)
+; RV64-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v10, a2
+; RV64-NEXT:    vslideup.vi v8, v10, 4
+; RV64-NEXT:    andi a2, a1, 32
+; RV64-NEXT:    beqz a2, .LBB16_6
+; RV64-NEXT:  .LBB16_24: # %cond.load13
+; RV64-NEXT:    lh a2, 10(a0)
+; RV64-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v10, a2
+; RV64-NEXT:    vslideup.vi v8, v10, 5
+; RV64-NEXT:    andi a2, a1, 64
+; RV64-NEXT:    beqz a2, .LBB16_7
+; RV64-NEXT:  .LBB16_25: # %cond.load16
+; RV64-NEXT:    lh a2, 12(a0)
+; RV64-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v10, a2
+; RV64-NEXT:    vslideup.vi v8, v10, 6
+; RV64-NEXT:    andi a2, a1, 128
+; RV64-NEXT:    beqz a2, .LBB16_8
+; RV64-NEXT:  .LBB16_26: # %cond.load19
+; RV64-NEXT:    lh a2, 14(a0)
+; RV64-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v10, a2
+; RV64-NEXT:    vslideup.vi v8, v10, 7
+; RV64-NEXT:    andi a2, a1, 256
+; RV64-NEXT:    beqz a2, .LBB16_9
+; RV64-NEXT:  .LBB16_27: # %cond.load22
+; RV64-NEXT:    lh a2, 16(a0)
+; RV64-NEXT:    vsetivli zero, 9, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v10, a2
+; RV64-NEXT:    vslideup.vi v8, v10, 8
+; RV64-NEXT:    andi a2, a1, 512
+; RV64-NEXT:    beqz a2, .LBB16_10
+; RV64-NEXT:  .LBB16_28: # %cond.load25
+; RV64-NEXT:    lh a2, 18(a0)
+; RV64-NEXT:    vsetivli zero, 10, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v10, a2
+; RV64-NEXT:    vslideup.vi v8, v10, 9
+; RV64-NEXT:    andi a2, a1, 1024
+; RV64-NEXT:    beqz a2, .LBB16_11
+; RV64-NEXT:  .LBB16_29: # %cond.load28
+; RV64-NEXT:    lh a2, 20(a0)
+; RV64-NEXT:    vsetivli zero, 11, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v10, a2
+; RV64-NEXT:    vslideup.vi v8, v10, 10
+; RV64-NEXT:    slli a2, a1, 52
+; RV64-NEXT:    bgez a2, .LBB16_12
+; RV64-NEXT:  .LBB16_30: # %cond.load31
+; RV64-NEXT:    lh a2, 22(a0)
+; RV64-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v10, a2
+; RV64-NEXT:    vslideup.vi v8, v10, 11
+; RV64-NEXT:    slli a2, a1, 51
+; RV64-NEXT:    bgez a2, .LBB16_13
+; RV64-NEXT:  .LBB16_31: # %cond.load34
+; RV64-NEXT:    lh a2, 24(a0)
+; RV64-NEXT:    vsetivli zero, 13, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v10, a2
+; RV64-NEXT:    vslideup.vi v8, v10, 12
+; RV64-NEXT:    slli a2, a1, 50
+; RV64-NEXT:    bgez a2, .LBB16_14
+; RV64-NEXT:  .LBB16_32: # %cond.load37
+; RV64-NEXT:    lh a2, 26(a0)
+; RV64-NEXT:    vsetivli zero, 14, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v10, a2
+; RV64-NEXT:    vslideup.vi v8, v10, 13
+; RV64-NEXT:    slli a2, a1, 49
+; RV64-NEXT:    bltz a2, .LBB16_15
+; RV64-NEXT:    j .LBB16_16
+  %load = call <16 x bfloat> @llvm.masked.load.v16bf16(ptr %a, i32 8, <16 x i1> %mask, <16 x bfloat> undef)
+  ret <16 x bfloat> %load
 }
-declare <8 x double> @llvm.masked.load.v8f64(ptr, i32, <8 x i1>, <8 x double>)
 
-define void @masked_load_v16f16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
-; CHECK-LABEL: masked_load_v16f16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a1)
-; CHECK-NEXT:    fmv.h.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
-; CHECK-NEXT:    vle16.v v8, (a0), v0.t
-; CHECK-NEXT:    vse16.v v8, (a2)
-; CHECK-NEXT:    ret
-  %m = load <16 x half>, ptr %m_ptr
-  %mask = fcmp oeq <16 x half> %m, zeroinitializer
+define <16 x half> @masked_load_v16f16(ptr %a, <16 x i1> %mask) {
+; ZVFH-LABEL: masked_load_v16f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT:    vle16.v v8, (a0), v0.t
+; ZVFH-NEXT:    ret
+;
+; RV32-ZVFHMIN-LABEL: masked_load_v16f16:
+; RV32-ZVFHMIN:       # %bb.0:
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v0
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 1
+; RV32-ZVFHMIN-NEXT:    # implicit-def: $v8m2
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB17_19
+; RV32-ZVFHMIN-NEXT:  # %bb.1: # %else
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 2
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB17_20
+; RV32-ZVFHMIN-NEXT:  .LBB17_2: # %else2
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 4
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB17_21
+; RV32-ZVFHMIN-NEXT:  .LBB17_3: # %else5
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 8
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB17_22
+; RV32-ZVFHMIN-NEXT:  .LBB17_4: # %else8
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 16
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB17_23
+; RV32-ZVFHMIN-NEXT:  .LBB17_5: # %else11
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 32
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB17_24
+; RV32-ZVFHMIN-NEXT:  .LBB17_6: # %else14
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 64
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB17_25
+; RV32-ZVFHMIN-NEXT:  .LBB17_7: # %else17
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 128
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB17_26
+; RV32-ZVFHMIN-NEXT:  .LBB17_8: # %else20
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 256
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB17_27
+; RV32-ZVFHMIN-NEXT:  .LBB17_9: # %else23
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 512
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB17_28
+; RV32-ZVFHMIN-NEXT:  .LBB17_10: # %else26
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 1024
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB17_29
+; RV32-ZVFHMIN-NEXT:  .LBB17_11: # %else29
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 20
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB17_30
+; RV32-ZVFHMIN-NEXT:  .LBB17_12: # %else32
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 19
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB17_31
+; RV32-ZVFHMIN-NEXT:  .LBB17_13: # %else35
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 18
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB17_32
+; RV32-ZVFHMIN-NEXT:  .LBB17_14: # %else38
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 17
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB17_16
+; RV32-ZVFHMIN-NEXT:  .LBB17_15: # %cond.load40
+; RV32-ZVFHMIN-NEXT:    lh a2, 28(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 15, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v10, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 14
+; RV32-ZVFHMIN-NEXT:  .LBB17_16: # %else41
+; RV32-ZVFHMIN-NEXT:    lui a2, 1048568
+; RV32-ZVFHMIN-NEXT:    and a1, a1, a2
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB17_18
+; RV32-ZVFHMIN-NEXT:  # %bb.17: # %cond.load43
+; RV32-ZVFHMIN-NEXT:    lh a0, 30(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v10, a0
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 15
+; RV32-ZVFHMIN-NEXT:  .LBB17_18: # %else44
+; RV32-ZVFHMIN-NEXT:    ret
+; RV32-ZVFHMIN-NEXT:  .LBB17_19: # %cond.load
+; RV32-ZVFHMIN-NEXT:    lh a2, 0(a0)
+; RV32-ZVFHMIN-NEXT:    fmv.x.h a3, fa5
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.v.x v8, a3
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v8, a2
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 2
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB17_2
+; RV32-ZVFHMIN-NEXT:  .LBB17_20: # %cond.load1
+; RV32-ZVFHMIN-NEXT:    lh a2, 2(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v10, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 1
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 4
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB17_3
+; RV32-ZVFHMIN-NEXT:  .LBB17_21: # %cond.load4
+; RV32-ZVFHMIN-NEXT:    lh a2, 4(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v10, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 2
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 8
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB17_4
+; RV32-ZVFHMIN-NEXT:  .LBB17_22: # %cond.load7
+; RV32-ZVFHMIN-NEXT:    lh a2, 6(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v10, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 3
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 16
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB17_5
+; RV32-ZVFHMIN-NEXT:  .LBB17_23: # %cond.load10
+; RV32-ZVFHMIN-NEXT:    lh a2, 8(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v10, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 4
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 32
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB17_6
+; RV32-ZVFHMIN-NEXT:  .LBB17_24: # %cond.load13
+; RV32-ZVFHMIN-NEXT:    lh a2, 10(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v10, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 5
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 64
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB17_7
+; RV32-ZVFHMIN-NEXT:  .LBB17_25: # %cond.load16
+; RV32-ZVFHMIN-NEXT:    lh a2, 12(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v10, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 6
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 128
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB17_8
+; RV32-ZVFHMIN-NEXT:  .LBB17_26: # %cond.load19
+; RV32-ZVFHMIN-NEXT:    lh a2, 14(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v10, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 7
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 256
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB17_9
+; RV32-ZVFHMIN-NEXT:  .LBB17_27: # %cond.load22
+; RV32-ZVFHMIN-NEXT:    lh a2, 16(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 9, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v10, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 8
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 512
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB17_10
+; RV32-ZVFHMIN-NEXT:  .LBB17_28: # %cond.load25
+; RV32-ZVFHMIN-NEXT:    lh a2, 18(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 10, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v10, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 9
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 1024
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB17_11
+; RV32-ZVFHMIN-NEXT:  .LBB17_29: # %cond.load28
+; RV32-ZVFHMIN-NEXT:    lh a2, 20(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 11, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v10, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 10
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 20
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB17_12
+; RV32-ZVFHMIN-NEXT:  .LBB17_30: # %cond.load31
+; RV32-ZVFHMIN-NEXT:    lh a2, 22(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v10, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 11
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 19
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB17_13
+; RV32-ZVFHMIN-NEXT:  .LBB17_31: # %cond.load34
+; RV32-ZVFHMIN-NEXT:    lh a2, 24(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 13, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v10, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 12
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 18
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB17_14
+; RV32-ZVFHMIN-NEXT:  .LBB17_32: # %cond.load37
+; RV32-ZVFHMIN-NEXT:    lh a2, 26(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 14, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v10, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 13
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 17
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB17_15
+; RV32-ZVFHMIN-NEXT:    j .LBB17_16
+;
+; RV64-ZVFHMIN-LABEL: masked_load_v16f16:
+; RV64-ZVFHMIN:       # %bb.0:
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v0
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 1
+; RV64-ZVFHMIN-NEXT:    # implicit-def: $v8m2
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB17_19
+; RV64-ZVFHMIN-NEXT:  # %bb.1: # %else
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 2
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB17_20
+; RV64-ZVFHMIN-NEXT:  .LBB17_2: # %else2
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 4
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB17_21
+; RV64-ZVFHMIN-NEXT:  .LBB17_3: # %else5
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 8
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB17_22
+; RV64-ZVFHMIN-NEXT:  .LBB17_4: # %else8
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 16
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB17_23
+; RV64-ZVFHMIN-NEXT:  .LBB17_5: # %else11
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 32
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB17_24
+; RV64-ZVFHMIN-NEXT:  .LBB17_6: # %else14
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 64
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB17_25
+; RV64-ZVFHMIN-NEXT:  .LBB17_7: # %else17
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 128
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB17_26
+; RV64-ZVFHMIN-NEXT:  .LBB17_8: # %else20
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 256
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB17_27
+; RV64-ZVFHMIN-NEXT:  .LBB17_9: # %else23
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 512
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB17_28
+; RV64-ZVFHMIN-NEXT:  .LBB17_10: # %else26
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 1024
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB17_29
+; RV64-ZVFHMIN-NEXT:  .LBB17_11: # %else29
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 52
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB17_30
+; RV64-ZVFHMIN-NEXT:  .LBB17_12: # %else32
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 51
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB17_31
+; RV64-ZVFHMIN-NEXT:  .LBB17_13: # %else35
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 50
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB17_32
+; RV64-ZVFHMIN-NEXT:  .LBB17_14: # %else38
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 49
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB17_16
+; RV64-ZVFHMIN-NEXT:  .LBB17_15: # %cond.load40
+; RV64-ZVFHMIN-NEXT:    lh a2, 28(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 15, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v10, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 14
+; RV64-ZVFHMIN-NEXT:  .LBB17_16: # %else41
+; RV64-ZVFHMIN-NEXT:    lui a2, 1048568
+; RV64-ZVFHMIN-NEXT:    and a1, a1, a2
+; RV64-ZVFHMIN-NEXT:    beqz a1, .LBB17_18
+; RV64-ZVFHMIN-NEXT:  # %bb.17: # %cond.load43
+; RV64-ZVFHMIN-NEXT:    lh a0, 30(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v10, a0
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 15
+; RV64-ZVFHMIN-NEXT:  .LBB17_18: # %else44
+; RV64-ZVFHMIN-NEXT:    ret
+; RV64-ZVFHMIN-NEXT:  .LBB17_19: # %cond.load
+; RV64-ZVFHMIN-NEXT:    lh a2, 0(a0)
+; RV64-ZVFHMIN-NEXT:    fmv.x.h a3, fa5
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.v.x v8, a3
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v8, a2
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 2
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB17_2
+; RV64-ZVFHMIN-NEXT:  .LBB17_20: # %cond.load1
+; RV64-ZVFHMIN-NEXT:    lh a2, 2(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v10, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 1
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 4
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB17_3
+; RV64-ZVFHMIN-NEXT:  .LBB17_21: # %cond.load4
+; RV64-ZVFHMIN-NEXT:    lh a2, 4(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v10, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 2
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 8
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB17_4
+; RV64-ZVFHMIN-NEXT:  .LBB17_22: # %cond.load7
+; RV64-ZVFHMIN-NEXT:    lh a2, 6(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v10, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 3
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 16
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB17_5
+; RV64-ZVFHMIN-NEXT:  .LBB17_23: # %cond.load10
+; RV64-ZVFHMIN-NEXT:    lh a2, 8(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v10, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 4
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 32
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB17_6
+; RV64-ZVFHMIN-NEXT:  .LBB17_24: # %cond.load13
+; RV64-ZVFHMIN-NEXT:    lh a2, 10(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v10, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 5
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 64
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB17_7
+; RV64-ZVFHMIN-NEXT:  .LBB17_25: # %cond.load16
+; RV64-ZVFHMIN-NEXT:    lh a2, 12(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v10, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 6
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 128
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB17_8
+; RV64-ZVFHMIN-NEXT:  .LBB17_26: # %cond.load19
+; RV64-ZVFHMIN-NEXT:    lh a2, 14(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v10, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 7
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 256
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB17_9
+; RV64-ZVFHMIN-NEXT:  .LBB17_27: # %cond.load22
+; RV64-ZVFHMIN-NEXT:    lh a2, 16(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 9, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v10, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 8
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 512
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB17_10
+; RV64-ZVFHMIN-NEXT:  .LBB17_28: # %cond.load25
+; RV64-ZVFHMIN-NEXT:    lh a2, 18(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 10, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v10, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 9
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 1024
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB17_11
+; RV64-ZVFHMIN-NEXT:  .LBB17_29: # %cond.load28
+; RV64-ZVFHMIN-NEXT:    lh a2, 20(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 11, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v10, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 10
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 52
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB17_12
+; RV64-ZVFHMIN-NEXT:  .LBB17_30: # %cond.load31
+; RV64-ZVFHMIN-NEXT:    lh a2, 22(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v10, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 11
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 51
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB17_13
+; RV64-ZVFHMIN-NEXT:  .LBB17_31: # %cond.load34
+; RV64-ZVFHMIN-NEXT:    lh a2, 24(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 13, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v10, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 12
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 50
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB17_14
+; RV64-ZVFHMIN-NEXT:  .LBB17_32: # %cond.load37
+; RV64-ZVFHMIN-NEXT:    lh a2, 26(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 14, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v10, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v10, 13
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 49
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB17_15
+; RV64-ZVFHMIN-NEXT:    j .LBB17_16
   %load = call <16 x half> @llvm.masked.load.v16f16(ptr %a, i32 8, <16 x i1> %mask, <16 x half> undef)
-  store <16 x half> %load, ptr %res_ptr
-  ret void
+  ret <16 x half> %load
 }
-declare <16 x half> @llvm.masked.load.v16f16(ptr, i32, <16 x i1>, <16 x half>)
 
-define void @masked_load_v16f32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <16 x float> @masked_load_v16f32(ptr %a, <16 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a1)
-; CHECK-NEXT:    fmv.w.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
 ; CHECK-NEXT:    vle32.v v8, (a0), v0.t
-; CHECK-NEXT:    vse32.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <16 x float>, ptr %m_ptr
-  %mask = fcmp oeq <16 x float> %m, zeroinitializer
   %load = call <16 x float> @llvm.masked.load.v16f32(ptr %a, i32 8, <16 x i1> %mask, <16 x float> undef)
-  store <16 x float> %load, ptr %res_ptr
-  ret void
+  ret <16 x float> %load
 }
-declare <16 x float> @llvm.masked.load.v16f32(ptr, i32, <16 x i1>, <16 x float>)
 
-define void @masked_load_v16f64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
-; RV32-LABEL: masked_load_v16f64:
+define <16 x double> @masked_load_v16f64(ptr %a, <16 x i1> %mask) {
+; CHECK-LABEL: masked_load_v16f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+  %load = call <16 x double> @llvm.masked.load.v16f64(ptr %a, i32 8, <16 x i1> %mask, <16 x double> undef)
+  ret <16 x double> %load
+}
+
+define <32 x bfloat> @masked_load_v32bf16(ptr %a, <32 x i1> %mask) {
+; RV32-LABEL: masked_load_v32bf16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vle64.v v8, (a1)
-; RV32-NEXT:    fcvt.d.w fa5, zero
-; RV32-NEXT:    vmfeq.vf v0, v8, fa5
-; RV32-NEXT:    vle64.v v8, (a0), v0.t
-; RV32-NEXT:    vse64.v v8, (a2)
+; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT:    vmv.x.s a1, v0
+; RV32-NEXT:    andi a2, a1, 1
+; RV32-NEXT:    # implicit-def: $v8m4
+; RV32-NEXT:    bnez a2, .LBB20_33
+; RV32-NEXT:  # %bb.1: # %else
+; RV32-NEXT:    andi a2, a1, 2
+; RV32-NEXT:    bnez a2, .LBB20_34
+; RV32-NEXT:  .LBB20_2: # %else2
+; RV32-NEXT:    andi a2, a1, 4
+; RV32-NEXT:    bnez a2, .LBB20_35
+; RV32-NEXT:  .LBB20_3: # %else5
+; RV32-NEXT:    andi a2, a1, 8
+; RV32-NEXT:    bnez a2, .LBB20_36
+; RV32-NEXT:  .LBB20_4: # %else8
+; RV32-NEXT:    andi a2, a1, 16
+; RV32-NEXT:    bnez a2, .LBB20_37
+; RV32-NEXT:  .LBB20_5: # %else11
+; RV32-NEXT:    andi a2, a1, 32
+; RV32-NEXT:    bnez a2, .LBB20_38
+; RV32-NEXT:  .LBB20_6: # %else14
+; RV32-NEXT:    andi a2, a1, 64
+; RV32-NEXT:    bnez a2, .LBB20_39
+; RV32-NEXT:  .LBB20_7: # %else17
+; RV32-NEXT:    andi a2, a1, 128
+; RV32-NEXT:    bnez a2, .LBB20_40
+; RV32-NEXT:  .LBB20_8: # %else20
+; RV32-NEXT:    andi a2, a1, 256
+; RV32-NEXT:    bnez a2, .LBB20_41
+; RV32-NEXT:  .LBB20_9: # %else23
+; RV32-NEXT:    andi a2, a1, 512
+; RV32-NEXT:    bnez a2, .LBB20_42
+; RV32-NEXT:  .LBB20_10: # %else26
+; RV32-NEXT:    andi a2, a1, 1024
+; RV32-NEXT:    bnez a2, .LBB20_43
+; RV32-NEXT:  .LBB20_11: # %else29
+; RV32-NEXT:    slli a2, a1, 20
+; RV32-NEXT:    bltz a2, .LBB20_44
+; RV32-NEXT:  .LBB20_12: # %else32
+; RV32-NEXT:    slli a2, a1, 19
+; RV32-NEXT:    bltz a2, .LBB20_45
+; RV32-NEXT:  .LBB20_13: # %else35
+; RV32-NEXT:    slli a2, a1, 18
+; RV32-NEXT:    bltz a2, .LBB20_46
+; RV32-NEXT:  .LBB20_14: # %else38
+; RV32-NEXT:    slli a2, a1, 17
+; RV32-NEXT:    bltz a2, .LBB20_47
+; RV32-NEXT:  .LBB20_15: # %else41
+; RV32-NEXT:    slli a2, a1, 16
+; RV32-NEXT:    bltz a2, .LBB20_48
+; RV32-NEXT:  .LBB20_16: # %else44
+; RV32-NEXT:    slli a2, a1, 15
+; RV32-NEXT:    bltz a2, .LBB20_49
+; RV32-NEXT:  .LBB20_17: # %else47
+; RV32-NEXT:    slli a2, a1, 14
+; RV32-NEXT:    bltz a2, .LBB20_50
+; RV32-NEXT:  .LBB20_18: # %else50
+; RV32-NEXT:    slli a2, a1, 13
+; RV32-NEXT:    bltz a2, .LBB20_51
+; RV32-NEXT:  .LBB20_19: # %else53
+; RV32-NEXT:    slli a2, a1, 12
+; RV32-NEXT:    bltz a2, .LBB20_52
+; RV32-NEXT:  .LBB20_20: # %else56
+; RV32-NEXT:    slli a2, a1, 11
+; RV32-NEXT:    bltz a2, .LBB20_53
+; RV32-NEXT:  .LBB20_21: # %else59
+; RV32-NEXT:    slli a2, a1, 10
+; RV32-NEXT:    bltz a2, .LBB20_54
+; RV32-NEXT:  .LBB20_22: # %else62
+; RV32-NEXT:    slli a2, a1, 9
+; RV32-NEXT:    bltz a2, .LBB20_55
+; RV32-NEXT:  .LBB20_23: # %else65
+; RV32-NEXT:    slli a2, a1, 8
+; RV32-NEXT:    bltz a2, .LBB20_56
+; RV32-NEXT:  .LBB20_24: # %else68
+; RV32-NEXT:    slli a2, a1, 7
+; RV32-NEXT:    bltz a2, .LBB20_57
+; RV32-NEXT:  .LBB20_25: # %else71
+; RV32-NEXT:    slli a2, a1, 6
+; RV32-NEXT:    bltz a2, .LBB20_58
+; RV32-NEXT:  .LBB20_26: # %else74
+; RV32-NEXT:    slli a2, a1, 5
+; RV32-NEXT:    bltz a2, .LBB20_59
+; RV32-NEXT:  .LBB20_27: # %else77
+; RV32-NEXT:    slli a2, a1, 4
+; RV32-NEXT:    bltz a2, .LBB20_60
+; RV32-NEXT:  .LBB20_28: # %else80
+; RV32-NEXT:    slli a2, a1, 3
+; RV32-NEXT:    bltz a2, .LBB20_61
+; RV32-NEXT:  .LBB20_29: # %else83
+; RV32-NEXT:    slli a2, a1, 2
+; RV32-NEXT:    bltz a2, .LBB20_62
+; RV32-NEXT:  .LBB20_30: # %else86
+; RV32-NEXT:    slli a2, a1, 1
+; RV32-NEXT:    bltz a2, .LBB20_63
+; RV32-NEXT:  .LBB20_31: # %else89
+; RV32-NEXT:    bltz a1, .LBB20_64
+; RV32-NEXT:  .LBB20_32: # %else92
+; RV32-NEXT:    ret
+; RV32-NEXT:  .LBB20_33: # %cond.load
+; RV32-NEXT:    lh a2, 0(a0)
+; RV32-NEXT:    fmv.x.h a3, fa5
+; RV32-NEXT:    li a4, 32
+; RV32-NEXT:    vsetvli zero, a4, e16, m4, ta, ma
+; RV32-NEXT:    vmv.v.x v8, a3
+; RV32-NEXT:    vsetvli zero, zero, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v8, a2
+; RV32-NEXT:    andi a2, a1, 2
+; RV32-NEXT:    beqz a2, .LBB20_2
+; RV32-NEXT:  .LBB20_34: # %cond.load1
+; RV32-NEXT:    lh a2, 2(a0)
+; RV32-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 1
+; RV32-NEXT:    andi a2, a1, 4
+; RV32-NEXT:    beqz a2, .LBB20_3
+; RV32-NEXT:  .LBB20_35: # %cond.load4
+; RV32-NEXT:    lh a2, 4(a0)
+; RV32-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 2
+; RV32-NEXT:    andi a2, a1, 8
+; RV32-NEXT:    beqz a2, .LBB20_4
+; RV32-NEXT:  .LBB20_36: # %cond.load7
+; RV32-NEXT:    lh a2, 6(a0)
+; RV32-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 3
+; RV32-NEXT:    andi a2, a1, 16
+; RV32-NEXT:    beqz a2, .LBB20_5
+; RV32-NEXT:  .LBB20_37: # %cond.load10
+; RV32-NEXT:    lh a2, 8(a0)
+; RV32-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 4
+; RV32-NEXT:    andi a2, a1, 32
+; RV32-NEXT:    beqz a2, .LBB20_6
+; RV32-NEXT:  .LBB20_38: # %cond.load13
+; RV32-NEXT:    lh a2, 10(a0)
+; RV32-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 5
+; RV32-NEXT:    andi a2, a1, 64
+; RV32-NEXT:    beqz a2, .LBB20_7
+; RV32-NEXT:  .LBB20_39: # %cond.load16
+; RV32-NEXT:    lh a2, 12(a0)
+; RV32-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 6
+; RV32-NEXT:    andi a2, a1, 128
+; RV32-NEXT:    beqz a2, .LBB20_8
+; RV32-NEXT:  .LBB20_40: # %cond.load19
+; RV32-NEXT:    lh a2, 14(a0)
+; RV32-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 7
+; RV32-NEXT:    andi a2, a1, 256
+; RV32-NEXT:    beqz a2, .LBB20_9
+; RV32-NEXT:  .LBB20_41: # %cond.load22
+; RV32-NEXT:    lh a2, 16(a0)
+; RV32-NEXT:    vsetivli zero, 9, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 8
+; RV32-NEXT:    andi a2, a1, 512
+; RV32-NEXT:    beqz a2, .LBB20_10
+; RV32-NEXT:  .LBB20_42: # %cond.load25
+; RV32-NEXT:    lh a2, 18(a0)
+; RV32-NEXT:    vsetivli zero, 10, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 9
+; RV32-NEXT:    andi a2, a1, 1024
+; RV32-NEXT:    beqz a2, .LBB20_11
+; RV32-NEXT:  .LBB20_43: # %cond.load28
+; RV32-NEXT:    lh a2, 20(a0)
+; RV32-NEXT:    vsetivli zero, 11, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 10
+; RV32-NEXT:    slli a2, a1, 20
+; RV32-NEXT:    bgez a2, .LBB20_12
+; RV32-NEXT:  .LBB20_44: # %cond.load31
+; RV32-NEXT:    lh a2, 22(a0)
+; RV32-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 11
+; RV32-NEXT:    slli a2, a1, 19
+; RV32-NEXT:    bgez a2, .LBB20_13
+; RV32-NEXT:  .LBB20_45: # %cond.load34
+; RV32-NEXT:    lh a2, 24(a0)
+; RV32-NEXT:    vsetivli zero, 13, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 12
+; RV32-NEXT:    slli a2, a1, 18
+; RV32-NEXT:    bgez a2, .LBB20_14
+; RV32-NEXT:  .LBB20_46: # %cond.load37
+; RV32-NEXT:    lh a2, 26(a0)
+; RV32-NEXT:    vsetivli zero, 14, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 13
+; RV32-NEXT:    slli a2, a1, 17
+; RV32-NEXT:    bgez a2, .LBB20_15
+; RV32-NEXT:  .LBB20_47: # %cond.load40
+; RV32-NEXT:    lh a2, 28(a0)
+; RV32-NEXT:    vsetivli zero, 15, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 14
+; RV32-NEXT:    slli a2, a1, 16
+; RV32-NEXT:    bgez a2, .LBB20_16
+; RV32-NEXT:  .LBB20_48: # %cond.load43
+; RV32-NEXT:    lh a2, 30(a0)
+; RV32-NEXT:    vsetivli zero, 16, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 15
+; RV32-NEXT:    slli a2, a1, 15
+; RV32-NEXT:    bgez a2, .LBB20_17
+; RV32-NEXT:  .LBB20_49: # %cond.load46
+; RV32-NEXT:    lh a2, 32(a0)
+; RV32-NEXT:    vsetivli zero, 17, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 16
+; RV32-NEXT:    slli a2, a1, 14
+; RV32-NEXT:    bgez a2, .LBB20_18
+; RV32-NEXT:  .LBB20_50: # %cond.load49
+; RV32-NEXT:    lh a2, 34(a0)
+; RV32-NEXT:    vsetivli zero, 18, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 17
+; RV32-NEXT:    slli a2, a1, 13
+; RV32-NEXT:    bgez a2, .LBB20_19
+; RV32-NEXT:  .LBB20_51: # %cond.load52
+; RV32-NEXT:    lh a2, 36(a0)
+; RV32-NEXT:    vsetivli zero, 19, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 18
+; RV32-NEXT:    slli a2, a1, 12
+; RV32-NEXT:    bgez a2, .LBB20_20
+; RV32-NEXT:  .LBB20_52: # %cond.load55
+; RV32-NEXT:    lh a2, 38(a0)
+; RV32-NEXT:    vsetivli zero, 20, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 19
+; RV32-NEXT:    slli a2, a1, 11
+; RV32-NEXT:    bgez a2, .LBB20_21
+; RV32-NEXT:  .LBB20_53: # %cond.load58
+; RV32-NEXT:    lh a2, 40(a0)
+; RV32-NEXT:    vsetivli zero, 21, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 20
+; RV32-NEXT:    slli a2, a1, 10
+; RV32-NEXT:    bgez a2, .LBB20_22
+; RV32-NEXT:  .LBB20_54: # %cond.load61
+; RV32-NEXT:    lh a2, 42(a0)
+; RV32-NEXT:    vsetivli zero, 22, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 21
+; RV32-NEXT:    slli a2, a1, 9
+; RV32-NEXT:    bgez a2, .LBB20_23
+; RV32-NEXT:  .LBB20_55: # %cond.load64
+; RV32-NEXT:    lh a2, 44(a0)
+; RV32-NEXT:    vsetivli zero, 23, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 22
+; RV32-NEXT:    slli a2, a1, 8
+; RV32-NEXT:    bgez a2, .LBB20_24
+; RV32-NEXT:  .LBB20_56: # %cond.load67
+; RV32-NEXT:    lh a2, 46(a0)
+; RV32-NEXT:    vsetivli zero, 24, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 23
+; RV32-NEXT:    slli a2, a1, 7
+; RV32-NEXT:    bgez a2, .LBB20_25
+; RV32-NEXT:  .LBB20_57: # %cond.load70
+; RV32-NEXT:    lh a2, 48(a0)
+; RV32-NEXT:    vsetivli zero, 25, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 24
+; RV32-NEXT:    slli a2, a1, 6
+; RV32-NEXT:    bgez a2, .LBB20_26
+; RV32-NEXT:  .LBB20_58: # %cond.load73
+; RV32-NEXT:    lh a2, 50(a0)
+; RV32-NEXT:    vsetivli zero, 26, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 25
+; RV32-NEXT:    slli a2, a1, 5
+; RV32-NEXT:    bgez a2, .LBB20_27
+; RV32-NEXT:  .LBB20_59: # %cond.load76
+; RV32-NEXT:    lh a2, 52(a0)
+; RV32-NEXT:    vsetivli zero, 27, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 26
+; RV32-NEXT:    slli a2, a1, 4
+; RV32-NEXT:    bgez a2, .LBB20_28
+; RV32-NEXT:  .LBB20_60: # %cond.load79
+; RV32-NEXT:    lh a2, 54(a0)
+; RV32-NEXT:    vsetivli zero, 28, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 27
+; RV32-NEXT:    slli a2, a1, 3
+; RV32-NEXT:    bgez a2, .LBB20_29
+; RV32-NEXT:  .LBB20_61: # %cond.load82
+; RV32-NEXT:    lh a2, 56(a0)
+; RV32-NEXT:    vsetivli zero, 29, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 28
+; RV32-NEXT:    slli a2, a1, 2
+; RV32-NEXT:    bgez a2, .LBB20_30
+; RV32-NEXT:  .LBB20_62: # %cond.load85
+; RV32-NEXT:    lh a2, 58(a0)
+; RV32-NEXT:    vsetivli zero, 30, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 29
+; RV32-NEXT:    slli a2, a1, 1
+; RV32-NEXT:    bgez a2, .LBB20_31
+; RV32-NEXT:  .LBB20_63: # %cond.load88
+; RV32-NEXT:    lh a2, 60(a0)
+; RV32-NEXT:    vsetivli zero, 31, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v12, a2
+; RV32-NEXT:    vslideup.vi v8, v12, 30
+; RV32-NEXT:    bgez a1, .LBB20_32
+; RV32-NEXT:  .LBB20_64: # %cond.load91
+; RV32-NEXT:    lh a0, 62(a0)
+; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
+; RV32-NEXT:    vmv.s.x v12, a0
+; RV32-NEXT:    vslideup.vi v8, v12, 31
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: masked_load_v16f64:
+; RV64-LABEL: masked_load_v32bf16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT:    vle64.v v8, (a1)
-; RV64-NEXT:    fmv.d.x fa5, zero
-; RV64-NEXT:    vmfeq.vf v0, v8, fa5
-; RV64-NEXT:    vle64.v v8, (a0), v0.t
-; RV64-NEXT:    vse64.v v8, (a2)
+; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT:    vmv.x.s a1, v0
+; RV64-NEXT:    andi a2, a1, 1
+; RV64-NEXT:    # implicit-def: $v8m4
+; RV64-NEXT:    bnez a2, .LBB20_35
+; RV64-NEXT:  # %bb.1: # %else
+; RV64-NEXT:    andi a2, a1, 2
+; RV64-NEXT:    bnez a2, .LBB20_36
+; RV64-NEXT:  .LBB20_2: # %else2
+; RV64-NEXT:    andi a2, a1, 4
+; RV64-NEXT:    bnez a2, .LBB20_37
+; RV64-NEXT:  .LBB20_3: # %else5
+; RV64-NEXT:    andi a2, a1, 8
+; RV64-NEXT:    bnez a2, .LBB20_38
+; RV64-NEXT:  .LBB20_4: # %else8
+; RV64-NEXT:    andi a2, a1, 16
+; RV64-NEXT:    bnez a2, .LBB20_39
+; RV64-NEXT:  .LBB20_5: # %else11
+; RV64-NEXT:    andi a2, a1, 32
+; RV64-NEXT:    bnez a2, .LBB20_40
+; RV64-NEXT:  .LBB20_6: # %else14
+; RV64-NEXT:    andi a2, a1, 64
+; RV64-NEXT:    bnez a2, .LBB20_41
+; RV64-NEXT:  .LBB20_7: # %else17
+; RV64-NEXT:    andi a2, a1, 128
+; RV64-NEXT:    bnez a2, .LBB20_42
+; RV64-NEXT:  .LBB20_8: # %else20
+; RV64-NEXT:    andi a2, a1, 256
+; RV64-NEXT:    bnez a2, .LBB20_43
+; RV64-NEXT:  .LBB20_9: # %else23
+; RV64-NEXT:    andi a2, a1, 512
+; RV64-NEXT:    bnez a2, .LBB20_44
+; RV64-NEXT:  .LBB20_10: # %else26
+; RV64-NEXT:    andi a2, a1, 1024
+; RV64-NEXT:    bnez a2, .LBB20_45
+; RV64-NEXT:  .LBB20_11: # %else29
+; RV64-NEXT:    slli a2, a1, 52
+; RV64-NEXT:    bltz a2, .LBB20_46
+; RV64-NEXT:  .LBB20_12: # %else32
+; RV64-NEXT:    slli a2, a1, 51
+; RV64-NEXT:    bltz a2, .LBB20_47
+; RV64-NEXT:  .LBB20_13: # %else35
+; RV64-NEXT:    slli a2, a1, 50
+; RV64-NEXT:    bltz a2, .LBB20_48
+; RV64-NEXT:  .LBB20_14: # %else38
+; RV64-NEXT:    slli a2, a1, 49
+; RV64-NEXT:    bltz a2, .LBB20_49
+; RV64-NEXT:  .LBB20_15: # %else41
+; RV64-NEXT:    slli a2, a1, 48
+; RV64-NEXT:    bltz a2, .LBB20_50
+; RV64-NEXT:  .LBB20_16: # %else44
+; RV64-NEXT:    slli a2, a1, 47
+; RV64-NEXT:    bltz a2, .LBB20_51
+; RV64-NEXT:  .LBB20_17: # %else47
+; RV64-NEXT:    slli a2, a1, 46
+; RV64-NEXT:    bltz a2, .LBB20_52
+; RV64-NEXT:  .LBB20_18: # %else50
+; RV64-NEXT:    slli a2, a1, 45
+; RV64-NEXT:    bltz a2, .LBB20_53
+; RV64-NEXT:  .LBB20_19: # %else53
+; RV64-NEXT:    slli a2, a1, 44
+; RV64-NEXT:    bltz a2, .LBB20_54
+; RV64-NEXT:  .LBB20_20: # %else56
+; RV64-NEXT:    slli a2, a1, 43
+; RV64-NEXT:    bltz a2, .LBB20_55
+; RV64-NEXT:  .LBB20_21: # %else59
+; RV64-NEXT:    slli a2, a1, 42
+; RV64-NEXT:    bltz a2, .LBB20_56
+; RV64-NEXT:  .LBB20_22: # %else62
+; RV64-NEXT:    slli a2, a1, 41
+; RV64-NEXT:    bltz a2, .LBB20_57
+; RV64-NEXT:  .LBB20_23: # %else65
+; RV64-NEXT:    slli a2, a1, 40
+; RV64-NEXT:    bltz a2, .LBB20_58
+; RV64-NEXT:  .LBB20_24: # %else68
+; RV64-NEXT:    slli a2, a1, 39
+; RV64-NEXT:    bltz a2, .LBB20_59
+; RV64-NEXT:  .LBB20_25: # %else71
+; RV64-NEXT:    slli a2, a1, 38
+; RV64-NEXT:    bltz a2, .LBB20_60
+; RV64-NEXT:  .LBB20_26: # %else74
+; RV64-NEXT:    slli a2, a1, 37
+; RV64-NEXT:    bltz a2, .LBB20_61
+; RV64-NEXT:  .LBB20_27: # %else77
+; RV64-NEXT:    slli a2, a1, 36
+; RV64-NEXT:    bltz a2, .LBB20_62
+; RV64-NEXT:  .LBB20_28: # %else80
+; RV64-NEXT:    slli a2, a1, 35
+; RV64-NEXT:    bltz a2, .LBB20_63
+; RV64-NEXT:  .LBB20_29: # %else83
+; RV64-NEXT:    slli a2, a1, 34
+; RV64-NEXT:    bltz a2, .LBB20_64
+; RV64-NEXT:  .LBB20_30: # %else86
+; RV64-NEXT:    slli a2, a1, 33
+; RV64-NEXT:    bgez a2, .LBB20_32
+; RV64-NEXT:  .LBB20_31: # %cond.load88
+; RV64-NEXT:    lh a2, 60(a0)
+; RV64-NEXT:    vsetivli zero, 31, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 30
+; RV64-NEXT:  .LBB20_32: # %else89
+; RV64-NEXT:    lui a2, 524288
+; RV64-NEXT:    and a1, a1, a2
+; RV64-NEXT:    beqz a1, .LBB20_34
+; RV64-NEXT:  # %bb.33: # %cond.load91
+; RV64-NEXT:    lh a0, 62(a0)
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
+; RV64-NEXT:    vmv.s.x v12, a0
+; RV64-NEXT:    vslideup.vi v8, v12, 31
+; RV64-NEXT:  .LBB20_34: # %else92
 ; RV64-NEXT:    ret
-  %m = load <16 x double>, ptr %m_ptr
-  %mask = fcmp oeq <16 x double> %m, zeroinitializer
-  %load = call <16 x double> @llvm.masked.load.v16f64(ptr %a, i32 8, <16 x i1> %mask, <16 x double> undef)
-  store <16 x double> %load, ptr %res_ptr
-  ret void
+; RV64-NEXT:  .LBB20_35: # %cond.load
+; RV64-NEXT:    lh a2, 0(a0)
+; RV64-NEXT:    fmv.x.h a3, fa5
+; RV64-NEXT:    li a4, 32
+; RV64-NEXT:    vsetvli zero, a4, e16, m4, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a3
+; RV64-NEXT:    vsetvli zero, zero, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v8, a2
+; RV64-NEXT:    andi a2, a1, 2
+; RV64-NEXT:    beqz a2, .LBB20_2
+; RV64-NEXT:  .LBB20_36: # %cond.load1
+; RV64-NEXT:    lh a2, 2(a0)
+; RV64-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 1
+; RV64-NEXT:    andi a2, a1, 4
+; RV64-NEXT:    beqz a2, .LBB20_3
+; RV64-NEXT:  .LBB20_37: # %cond.load4
+; RV64-NEXT:    lh a2, 4(a0)
+; RV64-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 2
+; RV64-NEXT:    andi a2, a1, 8
+; RV64-NEXT:    beqz a2, .LBB20_4
+; RV64-NEXT:  .LBB20_38: # %cond.load7
+; RV64-NEXT:    lh a2, 6(a0)
+; RV64-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 3
+; RV64-NEXT:    andi a2, a1, 16
+; RV64-NEXT:    beqz a2, .LBB20_5
+; RV64-NEXT:  .LBB20_39: # %cond.load10
+; RV64-NEXT:    lh a2, 8(a0)
+; RV64-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 4
+; RV64-NEXT:    andi a2, a1, 32
+; RV64-NEXT:    beqz a2, .LBB20_6
+; RV64-NEXT:  .LBB20_40: # %cond.load13
+; RV64-NEXT:    lh a2, 10(a0)
+; RV64-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 5
+; RV64-NEXT:    andi a2, a1, 64
+; RV64-NEXT:    beqz a2, .LBB20_7
+; RV64-NEXT:  .LBB20_41: # %cond.load16
+; RV64-NEXT:    lh a2, 12(a0)
+; RV64-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 6
+; RV64-NEXT:    andi a2, a1, 128
+; RV64-NEXT:    beqz a2, .LBB20_8
+; RV64-NEXT:  .LBB20_42: # %cond.load19
+; RV64-NEXT:    lh a2, 14(a0)
+; RV64-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 7
+; RV64-NEXT:    andi a2, a1, 256
+; RV64-NEXT:    beqz a2, .LBB20_9
+; RV64-NEXT:  .LBB20_43: # %cond.load22
+; RV64-NEXT:    lh a2, 16(a0)
+; RV64-NEXT:    vsetivli zero, 9, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 8
+; RV64-NEXT:    andi a2, a1, 512
+; RV64-NEXT:    beqz a2, .LBB20_10
+; RV64-NEXT:  .LBB20_44: # %cond.load25
+; RV64-NEXT:    lh a2, 18(a0)
+; RV64-NEXT:    vsetivli zero, 10, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 9
+; RV64-NEXT:    andi a2, a1, 1024
+; RV64-NEXT:    beqz a2, .LBB20_11
+; RV64-NEXT:  .LBB20_45: # %cond.load28
+; RV64-NEXT:    lh a2, 20(a0)
+; RV64-NEXT:    vsetivli zero, 11, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 10
+; RV64-NEXT:    slli a2, a1, 52
+; RV64-NEXT:    bgez a2, .LBB20_12
+; RV64-NEXT:  .LBB20_46: # %cond.load31
+; RV64-NEXT:    lh a2, 22(a0)
+; RV64-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 11
+; RV64-NEXT:    slli a2, a1, 51
+; RV64-NEXT:    bgez a2, .LBB20_13
+; RV64-NEXT:  .LBB20_47: # %cond.load34
+; RV64-NEXT:    lh a2, 24(a0)
+; RV64-NEXT:    vsetivli zero, 13, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 12
+; RV64-NEXT:    slli a2, a1, 50
+; RV64-NEXT:    bgez a2, .LBB20_14
+; RV64-NEXT:  .LBB20_48: # %cond.load37
+; RV64-NEXT:    lh a2, 26(a0)
+; RV64-NEXT:    vsetivli zero, 14, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 13
+; RV64-NEXT:    slli a2, a1, 49
+; RV64-NEXT:    bgez a2, .LBB20_15
+; RV64-NEXT:  .LBB20_49: # %cond.load40
+; RV64-NEXT:    lh a2, 28(a0)
+; RV64-NEXT:    vsetivli zero, 15, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 14
+; RV64-NEXT:    slli a2, a1, 48
+; RV64-NEXT:    bgez a2, .LBB20_16
+; RV64-NEXT:  .LBB20_50: # %cond.load43
+; RV64-NEXT:    lh a2, 30(a0)
+; RV64-NEXT:    vsetivli zero, 16, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 15
+; RV64-NEXT:    slli a2, a1, 47
+; RV64-NEXT:    bgez a2, .LBB20_17
+; RV64-NEXT:  .LBB20_51: # %cond.load46
+; RV64-NEXT:    lh a2, 32(a0)
+; RV64-NEXT:    vsetivli zero, 17, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 16
+; RV64-NEXT:    slli a2, a1, 46
+; RV64-NEXT:    bgez a2, .LBB20_18
+; RV64-NEXT:  .LBB20_52: # %cond.load49
+; RV64-NEXT:    lh a2, 34(a0)
+; RV64-NEXT:    vsetivli zero, 18, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 17
+; RV64-NEXT:    slli a2, a1, 45
+; RV64-NEXT:    bgez a2, .LBB20_19
+; RV64-NEXT:  .LBB20_53: # %cond.load52
+; RV64-NEXT:    lh a2, 36(a0)
+; RV64-NEXT:    vsetivli zero, 19, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 18
+; RV64-NEXT:    slli a2, a1, 44
+; RV64-NEXT:    bgez a2, .LBB20_20
+; RV64-NEXT:  .LBB20_54: # %cond.load55
+; RV64-NEXT:    lh a2, 38(a0)
+; RV64-NEXT:    vsetivli zero, 20, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 19
+; RV64-NEXT:    slli a2, a1, 43
+; RV64-NEXT:    bgez a2, .LBB20_21
+; RV64-NEXT:  .LBB20_55: # %cond.load58
+; RV64-NEXT:    lh a2, 40(a0)
+; RV64-NEXT:    vsetivli zero, 21, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 20
+; RV64-NEXT:    slli a2, a1, 42
+; RV64-NEXT:    bgez a2, .LBB20_22
+; RV64-NEXT:  .LBB20_56: # %cond.load61
+; RV64-NEXT:    lh a2, 42(a0)
+; RV64-NEXT:    vsetivli zero, 22, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 21
+; RV64-NEXT:    slli a2, a1, 41
+; RV64-NEXT:    bgez a2, .LBB20_23
+; RV64-NEXT:  .LBB20_57: # %cond.load64
+; RV64-NEXT:    lh a2, 44(a0)
+; RV64-NEXT:    vsetivli zero, 23, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 22
+; RV64-NEXT:    slli a2, a1, 40
+; RV64-NEXT:    bgez a2, .LBB20_24
+; RV64-NEXT:  .LBB20_58: # %cond.load67
+; RV64-NEXT:    lh a2, 46(a0)
+; RV64-NEXT:    vsetivli zero, 24, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 23
+; RV64-NEXT:    slli a2, a1, 39
+; RV64-NEXT:    bgez a2, .LBB20_25
+; RV64-NEXT:  .LBB20_59: # %cond.load70
+; RV64-NEXT:    lh a2, 48(a0)
+; RV64-NEXT:    vsetivli zero, 25, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 24
+; RV64-NEXT:    slli a2, a1, 38
+; RV64-NEXT:    bgez a2, .LBB20_26
+; RV64-NEXT:  .LBB20_60: # %cond.load73
+; RV64-NEXT:    lh a2, 50(a0)
+; RV64-NEXT:    vsetivli zero, 26, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 25
+; RV64-NEXT:    slli a2, a1, 37
+; RV64-NEXT:    bgez a2, .LBB20_27
+; RV64-NEXT:  .LBB20_61: # %cond.load76
+; RV64-NEXT:    lh a2, 52(a0)
+; RV64-NEXT:    vsetivli zero, 27, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 26
+; RV64-NEXT:    slli a2, a1, 36
+; RV64-NEXT:    bgez a2, .LBB20_28
+; RV64-NEXT:  .LBB20_62: # %cond.load79
+; RV64-NEXT:    lh a2, 54(a0)
+; RV64-NEXT:    vsetivli zero, 28, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 27
+; RV64-NEXT:    slli a2, a1, 35
+; RV64-NEXT:    bgez a2, .LBB20_29
+; RV64-NEXT:  .LBB20_63: # %cond.load82
+; RV64-NEXT:    lh a2, 56(a0)
+; RV64-NEXT:    vsetivli zero, 29, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 28
+; RV64-NEXT:    slli a2, a1, 34
+; RV64-NEXT:    bgez a2, .LBB20_30
+; RV64-NEXT:  .LBB20_64: # %cond.load85
+; RV64-NEXT:    lh a2, 58(a0)
+; RV64-NEXT:    vsetivli zero, 30, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v12, a2
+; RV64-NEXT:    vslideup.vi v8, v12, 29
+; RV64-NEXT:    slli a2, a1, 33
+; RV64-NEXT:    bltz a2, .LBB20_31
+; RV64-NEXT:    j .LBB20_32
+  %load = call <32 x bfloat> @llvm.masked.load.v32bf16(ptr %a, i32 8, <32 x i1> %mask, <32 x bfloat> undef)
+  ret <32 x bfloat> %load
 }
-declare <16 x double> @llvm.masked.load.v16f64(ptr, i32, <16 x i1>, <16 x double>)
 
-define void @masked_load_v32f16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
-; CHECK-LABEL: masked_load_v32f16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a3, 32
-; CHECK-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a1)
-; CHECK-NEXT:    fmv.h.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
-; CHECK-NEXT:    vle16.v v8, (a0), v0.t
-; CHECK-NEXT:    vse16.v v8, (a2)
-; CHECK-NEXT:    ret
-  %m = load <32 x half>, ptr %m_ptr
-  %mask = fcmp oeq <32 x half> %m, zeroinitializer
+define <32 x half> @masked_load_v32f16(ptr %a, <32 x i1> %mask) {
+; ZVFH-LABEL: masked_load_v32f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    li a1, 32
+; ZVFH-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
+; ZVFH-NEXT:    vle16.v v8, (a0), v0.t
+; ZVFH-NEXT:    ret
+;
+; RV32-ZVFHMIN-LABEL: masked_load_v32f16:
+; RV32-ZVFHMIN:       # %bb.0:
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v0
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 1
+; RV32-ZVFHMIN-NEXT:    # implicit-def: $v8m4
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB21_33
+; RV32-ZVFHMIN-NEXT:  # %bb.1: # %else
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 2
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB21_34
+; RV32-ZVFHMIN-NEXT:  .LBB21_2: # %else2
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 4
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB21_35
+; RV32-ZVFHMIN-NEXT:  .LBB21_3: # %else5
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 8
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB21_36
+; RV32-ZVFHMIN-NEXT:  .LBB21_4: # %else8
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 16
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB21_37
+; RV32-ZVFHMIN-NEXT:  .LBB21_5: # %else11
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 32
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB21_38
+; RV32-ZVFHMIN-NEXT:  .LBB21_6: # %else14
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 64
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB21_39
+; RV32-ZVFHMIN-NEXT:  .LBB21_7: # %else17
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 128
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB21_40
+; RV32-ZVFHMIN-NEXT:  .LBB21_8: # %else20
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 256
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB21_41
+; RV32-ZVFHMIN-NEXT:  .LBB21_9: # %else23
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 512
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB21_42
+; RV32-ZVFHMIN-NEXT:  .LBB21_10: # %else26
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 1024
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB21_43
+; RV32-ZVFHMIN-NEXT:  .LBB21_11: # %else29
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 20
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_44
+; RV32-ZVFHMIN-NEXT:  .LBB21_12: # %else32
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 19
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_45
+; RV32-ZVFHMIN-NEXT:  .LBB21_13: # %else35
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 18
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_46
+; RV32-ZVFHMIN-NEXT:  .LBB21_14: # %else38
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 17
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_47
+; RV32-ZVFHMIN-NEXT:  .LBB21_15: # %else41
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 16
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_48
+; RV32-ZVFHMIN-NEXT:  .LBB21_16: # %else44
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 15
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_49
+; RV32-ZVFHMIN-NEXT:  .LBB21_17: # %else47
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 14
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_50
+; RV32-ZVFHMIN-NEXT:  .LBB21_18: # %else50
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 13
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_51
+; RV32-ZVFHMIN-NEXT:  .LBB21_19: # %else53
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 12
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_52
+; RV32-ZVFHMIN-NEXT:  .LBB21_20: # %else56
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 11
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_53
+; RV32-ZVFHMIN-NEXT:  .LBB21_21: # %else59
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 10
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_54
+; RV32-ZVFHMIN-NEXT:  .LBB21_22: # %else62
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 9
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_55
+; RV32-ZVFHMIN-NEXT:  .LBB21_23: # %else65
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 8
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_56
+; RV32-ZVFHMIN-NEXT:  .LBB21_24: # %else68
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 7
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_57
+; RV32-ZVFHMIN-NEXT:  .LBB21_25: # %else71
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 6
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_58
+; RV32-ZVFHMIN-NEXT:  .LBB21_26: # %else74
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 5
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_59
+; RV32-ZVFHMIN-NEXT:  .LBB21_27: # %else77
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 4
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_60
+; RV32-ZVFHMIN-NEXT:  .LBB21_28: # %else80
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 3
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_61
+; RV32-ZVFHMIN-NEXT:  .LBB21_29: # %else83
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 2
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_62
+; RV32-ZVFHMIN-NEXT:  .LBB21_30: # %else86
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 1
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_63
+; RV32-ZVFHMIN-NEXT:  .LBB21_31: # %else89
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB21_64
+; RV32-ZVFHMIN-NEXT:  .LBB21_32: # %else92
+; RV32-ZVFHMIN-NEXT:    ret
+; RV32-ZVFHMIN-NEXT:  .LBB21_33: # %cond.load
+; RV32-ZVFHMIN-NEXT:    lh a2, 0(a0)
+; RV32-ZVFHMIN-NEXT:    fmv.x.h a3, fa5
+; RV32-ZVFHMIN-NEXT:    li a4, 32
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m4, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.v.x v8, a3
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v8, a2
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 2
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB21_2
+; RV32-ZVFHMIN-NEXT:  .LBB21_34: # %cond.load1
+; RV32-ZVFHMIN-NEXT:    lh a2, 2(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 1
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 4
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB21_3
+; RV32-ZVFHMIN-NEXT:  .LBB21_35: # %cond.load4
+; RV32-ZVFHMIN-NEXT:    lh a2, 4(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 2
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 8
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB21_4
+; RV32-ZVFHMIN-NEXT:  .LBB21_36: # %cond.load7
+; RV32-ZVFHMIN-NEXT:    lh a2, 6(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 3
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 16
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB21_5
+; RV32-ZVFHMIN-NEXT:  .LBB21_37: # %cond.load10
+; RV32-ZVFHMIN-NEXT:    lh a2, 8(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 4
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 32
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB21_6
+; RV32-ZVFHMIN-NEXT:  .LBB21_38: # %cond.load13
+; RV32-ZVFHMIN-NEXT:    lh a2, 10(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 5
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 64
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB21_7
+; RV32-ZVFHMIN-NEXT:  .LBB21_39: # %cond.load16
+; RV32-ZVFHMIN-NEXT:    lh a2, 12(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 6
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 128
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB21_8
+; RV32-ZVFHMIN-NEXT:  .LBB21_40: # %cond.load19
+; RV32-ZVFHMIN-NEXT:    lh a2, 14(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 7
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 256
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB21_9
+; RV32-ZVFHMIN-NEXT:  .LBB21_41: # %cond.load22
+; RV32-ZVFHMIN-NEXT:    lh a2, 16(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 9, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 8
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 512
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB21_10
+; RV32-ZVFHMIN-NEXT:  .LBB21_42: # %cond.load25
+; RV32-ZVFHMIN-NEXT:    lh a2, 18(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 10, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 9
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 1024
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB21_11
+; RV32-ZVFHMIN-NEXT:  .LBB21_43: # %cond.load28
+; RV32-ZVFHMIN-NEXT:    lh a2, 20(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 11, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 10
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 20
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_12
+; RV32-ZVFHMIN-NEXT:  .LBB21_44: # %cond.load31
+; RV32-ZVFHMIN-NEXT:    lh a2, 22(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 11
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 19
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_13
+; RV32-ZVFHMIN-NEXT:  .LBB21_45: # %cond.load34
+; RV32-ZVFHMIN-NEXT:    lh a2, 24(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 13, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 12
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 18
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_14
+; RV32-ZVFHMIN-NEXT:  .LBB21_46: # %cond.load37
+; RV32-ZVFHMIN-NEXT:    lh a2, 26(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 14, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 13
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 17
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_15
+; RV32-ZVFHMIN-NEXT:  .LBB21_47: # %cond.load40
+; RV32-ZVFHMIN-NEXT:    lh a2, 28(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 15, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 14
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 16
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_16
+; RV32-ZVFHMIN-NEXT:  .LBB21_48: # %cond.load43
+; RV32-ZVFHMIN-NEXT:    lh a2, 30(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 15
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 15
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_17
+; RV32-ZVFHMIN-NEXT:  .LBB21_49: # %cond.load46
+; RV32-ZVFHMIN-NEXT:    lh a2, 32(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 17, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 16
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 14
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_18
+; RV32-ZVFHMIN-NEXT:  .LBB21_50: # %cond.load49
+; RV32-ZVFHMIN-NEXT:    lh a2, 34(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 18, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 17
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 13
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_19
+; RV32-ZVFHMIN-NEXT:  .LBB21_51: # %cond.load52
+; RV32-ZVFHMIN-NEXT:    lh a2, 36(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 19, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 18
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 12
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_20
+; RV32-ZVFHMIN-NEXT:  .LBB21_52: # %cond.load55
+; RV32-ZVFHMIN-NEXT:    lh a2, 38(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 20, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 19
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 11
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_21
+; RV32-ZVFHMIN-NEXT:  .LBB21_53: # %cond.load58
+; RV32-ZVFHMIN-NEXT:    lh a2, 40(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 21, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 20
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 10
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_22
+; RV32-ZVFHMIN-NEXT:  .LBB21_54: # %cond.load61
+; RV32-ZVFHMIN-NEXT:    lh a2, 42(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 22, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 21
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 9
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_23
+; RV32-ZVFHMIN-NEXT:  .LBB21_55: # %cond.load64
+; RV32-ZVFHMIN-NEXT:    lh a2, 44(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 23, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 22
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 8
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_24
+; RV32-ZVFHMIN-NEXT:  .LBB21_56: # %cond.load67
+; RV32-ZVFHMIN-NEXT:    lh a2, 46(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 24, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 23
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 7
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_25
+; RV32-ZVFHMIN-NEXT:  .LBB21_57: # %cond.load70
+; RV32-ZVFHMIN-NEXT:    lh a2, 48(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 25, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 24
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 6
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_26
+; RV32-ZVFHMIN-NEXT:  .LBB21_58: # %cond.load73
+; RV32-ZVFHMIN-NEXT:    lh a2, 50(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 26, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 25
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 5
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_27
+; RV32-ZVFHMIN-NEXT:  .LBB21_59: # %cond.load76
+; RV32-ZVFHMIN-NEXT:    lh a2, 52(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 27, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 26
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 4
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_28
+; RV32-ZVFHMIN-NEXT:  .LBB21_60: # %cond.load79
+; RV32-ZVFHMIN-NEXT:    lh a2, 54(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 28, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 27
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 3
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_29
+; RV32-ZVFHMIN-NEXT:  .LBB21_61: # %cond.load82
+; RV32-ZVFHMIN-NEXT:    lh a2, 56(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 29, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 28
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 2
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_30
+; RV32-ZVFHMIN-NEXT:  .LBB21_62: # %cond.load85
+; RV32-ZVFHMIN-NEXT:    lh a2, 58(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 30, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 29
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 1
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_31
+; RV32-ZVFHMIN-NEXT:  .LBB21_63: # %cond.load88
+; RV32-ZVFHMIN-NEXT:    lh a2, 60(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 31, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 30
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB21_32
+; RV32-ZVFHMIN-NEXT:  .LBB21_64: # %cond.load91
+; RV32-ZVFHMIN-NEXT:    lh a0, 62(a0)
+; RV32-ZVFHMIN-NEXT:    li a1, 32
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v12, a0
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 31
+; RV32-ZVFHMIN-NEXT:    ret
+;
+; RV64-ZVFHMIN-LABEL: masked_load_v32f16:
+; RV64-ZVFHMIN:       # %bb.0:
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v0
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 1
+; RV64-ZVFHMIN-NEXT:    # implicit-def: $v8m4
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB21_35
+; RV64-ZVFHMIN-NEXT:  # %bb.1: # %else
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 2
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB21_36
+; RV64-ZVFHMIN-NEXT:  .LBB21_2: # %else2
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 4
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB21_37
+; RV64-ZVFHMIN-NEXT:  .LBB21_3: # %else5
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 8
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB21_38
+; RV64-ZVFHMIN-NEXT:  .LBB21_4: # %else8
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 16
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB21_39
+; RV64-ZVFHMIN-NEXT:  .LBB21_5: # %else11
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 32
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB21_40
+; RV64-ZVFHMIN-NEXT:  .LBB21_6: # %else14
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 64
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB21_41
+; RV64-ZVFHMIN-NEXT:  .LBB21_7: # %else17
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 128
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB21_42
+; RV64-ZVFHMIN-NEXT:  .LBB21_8: # %else20
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 256
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB21_43
+; RV64-ZVFHMIN-NEXT:  .LBB21_9: # %else23
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 512
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB21_44
+; RV64-ZVFHMIN-NEXT:  .LBB21_10: # %else26
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 1024
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB21_45
+; RV64-ZVFHMIN-NEXT:  .LBB21_11: # %else29
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 52
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_46
+; RV64-ZVFHMIN-NEXT:  .LBB21_12: # %else32
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 51
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_47
+; RV64-ZVFHMIN-NEXT:  .LBB21_13: # %else35
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 50
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_48
+; RV64-ZVFHMIN-NEXT:  .LBB21_14: # %else38
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 49
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_49
+; RV64-ZVFHMIN-NEXT:  .LBB21_15: # %else41
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 48
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_50
+; RV64-ZVFHMIN-NEXT:  .LBB21_16: # %else44
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 47
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_51
+; RV64-ZVFHMIN-NEXT:  .LBB21_17: # %else47
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 46
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_52
+; RV64-ZVFHMIN-NEXT:  .LBB21_18: # %else50
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 45
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_53
+; RV64-ZVFHMIN-NEXT:  .LBB21_19: # %else53
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 44
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_54
+; RV64-ZVFHMIN-NEXT:  .LBB21_20: # %else56
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 43
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_55
+; RV64-ZVFHMIN-NEXT:  .LBB21_21: # %else59
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 42
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_56
+; RV64-ZVFHMIN-NEXT:  .LBB21_22: # %else62
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 41
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_57
+; RV64-ZVFHMIN-NEXT:  .LBB21_23: # %else65
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 40
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_58
+; RV64-ZVFHMIN-NEXT:  .LBB21_24: # %else68
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 39
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_59
+; RV64-ZVFHMIN-NEXT:  .LBB21_25: # %else71
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 38
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_60
+; RV64-ZVFHMIN-NEXT:  .LBB21_26: # %else74
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 37
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_61
+; RV64-ZVFHMIN-NEXT:  .LBB21_27: # %else77
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 36
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_62
+; RV64-ZVFHMIN-NEXT:  .LBB21_28: # %else80
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 35
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_63
+; RV64-ZVFHMIN-NEXT:  .LBB21_29: # %else83
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 34
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_64
+; RV64-ZVFHMIN-NEXT:  .LBB21_30: # %else86
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 33
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_32
+; RV64-ZVFHMIN-NEXT:  .LBB21_31: # %cond.load88
+; RV64-ZVFHMIN-NEXT:    lh a2, 60(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 31, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 30
+; RV64-ZVFHMIN-NEXT:  .LBB21_32: # %else89
+; RV64-ZVFHMIN-NEXT:    lui a2, 524288
+; RV64-ZVFHMIN-NEXT:    and a1, a1, a2
+; RV64-ZVFHMIN-NEXT:    beqz a1, .LBB21_34
+; RV64-ZVFHMIN-NEXT:  # %bb.33: # %cond.load91
+; RV64-ZVFHMIN-NEXT:    lh a0, 62(a0)
+; RV64-ZVFHMIN-NEXT:    li a1, 32
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a0
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 31
+; RV64-ZVFHMIN-NEXT:  .LBB21_34: # %else92
+; RV64-ZVFHMIN-NEXT:    ret
+; RV64-ZVFHMIN-NEXT:  .LBB21_35: # %cond.load
+; RV64-ZVFHMIN-NEXT:    lh a2, 0(a0)
+; RV64-ZVFHMIN-NEXT:    fmv.x.h a3, fa5
+; RV64-ZVFHMIN-NEXT:    li a4, 32
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m4, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.v.x v8, a3
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v8, a2
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 2
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB21_2
+; RV64-ZVFHMIN-NEXT:  .LBB21_36: # %cond.load1
+; RV64-ZVFHMIN-NEXT:    lh a2, 2(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 1
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 4
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB21_3
+; RV64-ZVFHMIN-NEXT:  .LBB21_37: # %cond.load4
+; RV64-ZVFHMIN-NEXT:    lh a2, 4(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 2
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 8
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB21_4
+; RV64-ZVFHMIN-NEXT:  .LBB21_38: # %cond.load7
+; RV64-ZVFHMIN-NEXT:    lh a2, 6(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 3
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 16
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB21_5
+; RV64-ZVFHMIN-NEXT:  .LBB21_39: # %cond.load10
+; RV64-ZVFHMIN-NEXT:    lh a2, 8(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 4
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 32
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB21_6
+; RV64-ZVFHMIN-NEXT:  .LBB21_40: # %cond.load13
+; RV64-ZVFHMIN-NEXT:    lh a2, 10(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 5
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 64
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB21_7
+; RV64-ZVFHMIN-NEXT:  .LBB21_41: # %cond.load16
+; RV64-ZVFHMIN-NEXT:    lh a2, 12(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 6
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 128
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB21_8
+; RV64-ZVFHMIN-NEXT:  .LBB21_42: # %cond.load19
+; RV64-ZVFHMIN-NEXT:    lh a2, 14(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 7
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 256
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB21_9
+; RV64-ZVFHMIN-NEXT:  .LBB21_43: # %cond.load22
+; RV64-ZVFHMIN-NEXT:    lh a2, 16(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 9, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 8
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 512
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB21_10
+; RV64-ZVFHMIN-NEXT:  .LBB21_44: # %cond.load25
+; RV64-ZVFHMIN-NEXT:    lh a2, 18(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 10, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 9
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 1024
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB21_11
+; RV64-ZVFHMIN-NEXT:  .LBB21_45: # %cond.load28
+; RV64-ZVFHMIN-NEXT:    lh a2, 20(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 11, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 10
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 52
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_12
+; RV64-ZVFHMIN-NEXT:  .LBB21_46: # %cond.load31
+; RV64-ZVFHMIN-NEXT:    lh a2, 22(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 11
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 51
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_13
+; RV64-ZVFHMIN-NEXT:  .LBB21_47: # %cond.load34
+; RV64-ZVFHMIN-NEXT:    lh a2, 24(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 13, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 12
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 50
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_14
+; RV64-ZVFHMIN-NEXT:  .LBB21_48: # %cond.load37
+; RV64-ZVFHMIN-NEXT:    lh a2, 26(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 14, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 13
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 49
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_15
+; RV64-ZVFHMIN-NEXT:  .LBB21_49: # %cond.load40
+; RV64-ZVFHMIN-NEXT:    lh a2, 28(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 15, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 14
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 48
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_16
+; RV64-ZVFHMIN-NEXT:  .LBB21_50: # %cond.load43
+; RV64-ZVFHMIN-NEXT:    lh a2, 30(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 15
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 47
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_17
+; RV64-ZVFHMIN-NEXT:  .LBB21_51: # %cond.load46
+; RV64-ZVFHMIN-NEXT:    lh a2, 32(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 17, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 16
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 46
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_18
+; RV64-ZVFHMIN-NEXT:  .LBB21_52: # %cond.load49
+; RV64-ZVFHMIN-NEXT:    lh a2, 34(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 18, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 17
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 45
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_19
+; RV64-ZVFHMIN-NEXT:  .LBB21_53: # %cond.load52
+; RV64-ZVFHMIN-NEXT:    lh a2, 36(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 19, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 18
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 44
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_20
+; RV64-ZVFHMIN-NEXT:  .LBB21_54: # %cond.load55
+; RV64-ZVFHMIN-NEXT:    lh a2, 38(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 20, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 19
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 43
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_21
+; RV64-ZVFHMIN-NEXT:  .LBB21_55: # %cond.load58
+; RV64-ZVFHMIN-NEXT:    lh a2, 40(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 21, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 20
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 42
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_22
+; RV64-ZVFHMIN-NEXT:  .LBB21_56: # %cond.load61
+; RV64-ZVFHMIN-NEXT:    lh a2, 42(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 22, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 21
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 41
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_23
+; RV64-ZVFHMIN-NEXT:  .LBB21_57: # %cond.load64
+; RV64-ZVFHMIN-NEXT:    lh a2, 44(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 23, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 22
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 40
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_24
+; RV64-ZVFHMIN-NEXT:  .LBB21_58: # %cond.load67
+; RV64-ZVFHMIN-NEXT:    lh a2, 46(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 24, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 23
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 39
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_25
+; RV64-ZVFHMIN-NEXT:  .LBB21_59: # %cond.load70
+; RV64-ZVFHMIN-NEXT:    lh a2, 48(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 25, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 24
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 38
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_26
+; RV64-ZVFHMIN-NEXT:  .LBB21_60: # %cond.load73
+; RV64-ZVFHMIN-NEXT:    lh a2, 50(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 26, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 25
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 37
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_27
+; RV64-ZVFHMIN-NEXT:  .LBB21_61: # %cond.load76
+; RV64-ZVFHMIN-NEXT:    lh a2, 52(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 27, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 26
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 36
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_28
+; RV64-ZVFHMIN-NEXT:  .LBB21_62: # %cond.load79
+; RV64-ZVFHMIN-NEXT:    lh a2, 54(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 28, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 27
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 35
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_29
+; RV64-ZVFHMIN-NEXT:  .LBB21_63: # %cond.load82
+; RV64-ZVFHMIN-NEXT:    lh a2, 56(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 29, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 28
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 34
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_30
+; RV64-ZVFHMIN-NEXT:  .LBB21_64: # %cond.load85
+; RV64-ZVFHMIN-NEXT:    lh a2, 58(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 30, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v12, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v12, 29
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 33
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_31
+; RV64-ZVFHMIN-NEXT:    j .LBB21_32
   %load = call <32 x half> @llvm.masked.load.v32f16(ptr %a, i32 8, <32 x i1> %mask, <32 x half> undef)
-  store <32 x half> %load, ptr %res_ptr
-  ret void
+  ret <32 x half> %load
 }
-declare <32 x half> @llvm.masked.load.v32f16(ptr, i32, <32 x i1>, <32 x half>)
 
-define void @masked_load_v32f32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <32 x float> @masked_load_v32f32(ptr %a, <32 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v32f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a3, 32
-; CHECK-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a1)
-; CHECK-NEXT:    fmv.w.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
+; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0), v0.t
-; CHECK-NEXT:    vse32.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <32 x float>, ptr %m_ptr
-  %mask = fcmp oeq <32 x float> %m, zeroinitializer
   %load = call <32 x float> @llvm.masked.load.v32f32(ptr %a, i32 8, <32 x i1> %mask, <32 x float> undef)
-  store <32 x float> %load, ptr %res_ptr
-  ret void
+  ret <32 x float> %load
+}
+
+define <32 x double> @masked_load_v32f64(ptr %a, <32 x i1> %mask) {
+; CHECK-LABEL: masked_load_v32f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0), v0.t
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; CHECK-NEXT:    vslidedown.vi v0, v0, 2
+; CHECK-NEXT:    addi a0, a0, 128
+; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT:    vle64.v v16, (a0), v0.t
+; CHECK-NEXT:    ret
+  %load = call <32 x double> @llvm.masked.load.v32f64(ptr %a, i32 8, <32 x i1> %mask, <32 x double> undef)
+  ret <32 x double> %load
 }
-declare <32 x float> @llvm.masked.load.v32f32(ptr, i32, <32 x i1>, <32 x float>)
 
-define void @masked_load_v32f64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
-; RV32-LABEL: masked_load_v32f64:
+define <64 x bfloat> @masked_load_v64bf16(ptr %a, <64 x i1> %mask) {
+; RV32-LABEL: masked_load_v64bf16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi a3, a1, 128
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vle64.v v16, (a1)
-; RV32-NEXT:    vle64.v v24, (a3)
-; RV32-NEXT:    fcvt.d.w fa5, zero
-; RV32-NEXT:    vmfeq.vf v8, v16, fa5
-; RV32-NEXT:    vmfeq.vf v0, v24, fa5
-; RV32-NEXT:    addi a1, a0, 128
-; RV32-NEXT:    vle64.v v16, (a1), v0.t
-; RV32-NEXT:    vmv1r.v v0, v8
-; RV32-NEXT:    vle64.v v8, (a0), v0.t
-; RV32-NEXT:    vse64.v v8, (a2)
-; RV32-NEXT:    addi a0, a2, 128
-; RV32-NEXT:    vse64.v v16, (a0)
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vmv.x.s a2, v0
+; RV32-NEXT:    andi a1, a2, 1
+; RV32-NEXT:    # implicit-def: $v8m8
+; RV32-NEXT:    bnez a1, .LBB24_68
+; RV32-NEXT:  # %bb.1: # %else
+; RV32-NEXT:    andi a1, a2, 2
+; RV32-NEXT:    bnez a1, .LBB24_69
+; RV32-NEXT:  .LBB24_2: # %else2
+; RV32-NEXT:    andi a1, a2, 4
+; RV32-NEXT:    bnez a1, .LBB24_70
+; RV32-NEXT:  .LBB24_3: # %else5
+; RV32-NEXT:    andi a1, a2, 8
+; RV32-NEXT:    bnez a1, .LBB24_71
+; RV32-NEXT:  .LBB24_4: # %else8
+; RV32-NEXT:    andi a1, a2, 16
+; RV32-NEXT:    bnez a1, .LBB24_72
+; RV32-NEXT:  .LBB24_5: # %else11
+; RV32-NEXT:    andi a1, a2, 32
+; RV32-NEXT:    bnez a1, .LBB24_73
+; RV32-NEXT:  .LBB24_6: # %else14
+; RV32-NEXT:    andi a1, a2, 64
+; RV32-NEXT:    bnez a1, .LBB24_74
+; RV32-NEXT:  .LBB24_7: # %else17
+; RV32-NEXT:    andi a1, a2, 128
+; RV32-NEXT:    bnez a1, .LBB24_75
+; RV32-NEXT:  .LBB24_8: # %else20
+; RV32-NEXT:    andi a1, a2, 256
+; RV32-NEXT:    bnez a1, .LBB24_76
+; RV32-NEXT:  .LBB24_9: # %else23
+; RV32-NEXT:    andi a1, a2, 512
+; RV32-NEXT:    bnez a1, .LBB24_77
+; RV32-NEXT:  .LBB24_10: # %else26
+; RV32-NEXT:    andi a1, a2, 1024
+; RV32-NEXT:    bnez a1, .LBB24_78
+; RV32-NEXT:  .LBB24_11: # %else29
+; RV32-NEXT:    slli a1, a2, 20
+; RV32-NEXT:    bltz a1, .LBB24_79
+; RV32-NEXT:  .LBB24_12: # %else32
+; RV32-NEXT:    slli a1, a2, 19
+; RV32-NEXT:    bltz a1, .LBB24_80
+; RV32-NEXT:  .LBB24_13: # %else35
+; RV32-NEXT:    slli a1, a2, 18
+; RV32-NEXT:    bltz a1, .LBB24_81
+; RV32-NEXT:  .LBB24_14: # %else38
+; RV32-NEXT:    slli a1, a2, 17
+; RV32-NEXT:    bltz a1, .LBB24_82
+; RV32-NEXT:  .LBB24_15: # %else41
+; RV32-NEXT:    slli a1, a2, 16
+; RV32-NEXT:    bltz a1, .LBB24_83
+; RV32-NEXT:  .LBB24_16: # %else44
+; RV32-NEXT:    slli a1, a2, 15
+; RV32-NEXT:    bltz a1, .LBB24_84
+; RV32-NEXT:  .LBB24_17: # %else47
+; RV32-NEXT:    slli a1, a2, 14
+; RV32-NEXT:    bltz a1, .LBB24_85
+; RV32-NEXT:  .LBB24_18: # %else50
+; RV32-NEXT:    slli a1, a2, 13
+; RV32-NEXT:    bltz a1, .LBB24_86
+; RV32-NEXT:  .LBB24_19: # %else53
+; RV32-NEXT:    slli a1, a2, 12
+; RV32-NEXT:    bltz a1, .LBB24_87
+; RV32-NEXT:  .LBB24_20: # %else56
+; RV32-NEXT:    slli a1, a2, 11
+; RV32-NEXT:    bltz a1, .LBB24_88
+; RV32-NEXT:  .LBB24_21: # %else59
+; RV32-NEXT:    slli a1, a2, 10
+; RV32-NEXT:    bltz a1, .LBB24_89
+; RV32-NEXT:  .LBB24_22: # %else62
+; RV32-NEXT:    slli a1, a2, 9
+; RV32-NEXT:    bltz a1, .LBB24_90
+; RV32-NEXT:  .LBB24_23: # %else65
+; RV32-NEXT:    slli a1, a2, 8
+; RV32-NEXT:    bltz a1, .LBB24_91
+; RV32-NEXT:  .LBB24_24: # %else68
+; RV32-NEXT:    slli a1, a2, 7
+; RV32-NEXT:    bltz a1, .LBB24_92
+; RV32-NEXT:  .LBB24_25: # %else71
+; RV32-NEXT:    slli a1, a2, 6
+; RV32-NEXT:    bltz a1, .LBB24_93
+; RV32-NEXT:  .LBB24_26: # %else74
+; RV32-NEXT:    slli a1, a2, 5
+; RV32-NEXT:    bltz a1, .LBB24_94
+; RV32-NEXT:  .LBB24_27: # %else77
+; RV32-NEXT:    slli a1, a2, 4
+; RV32-NEXT:    bltz a1, .LBB24_95
+; RV32-NEXT:  .LBB24_28: # %else80
+; RV32-NEXT:    slli a1, a2, 3
+; RV32-NEXT:    bgez a1, .LBB24_30
+; RV32-NEXT:  .LBB24_29: # %cond.load82
+; RV32-NEXT:    lh a1, 56(a0)
+; RV32-NEXT:    vsetivli zero, 29, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 28
+; RV32-NEXT:  .LBB24_30: # %else83
+; RV32-NEXT:    slli a3, a2, 2
+; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    bgez a3, .LBB24_32
+; RV32-NEXT:  # %bb.31: # %cond.load85
+; RV32-NEXT:    lh a3, 58(a0)
+; RV32-NEXT:    vsetivli zero, 30, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a3
+; RV32-NEXT:    vslideup.vi v8, v16, 29
+; RV32-NEXT:  .LBB24_32: # %else86
+; RV32-NEXT:    slli a3, a2, 1
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vsrl.vx v16, v0, a1
+; RV32-NEXT:    bgez a3, .LBB24_34
+; RV32-NEXT:  # %bb.33: # %cond.load88
+; RV32-NEXT:    lh a1, 60(a0)
+; RV32-NEXT:    vmv.s.x v20, a1
+; RV32-NEXT:    vsetivli zero, 31, e16, m4, tu, ma
+; RV32-NEXT:    vslideup.vi v8, v20, 30
+; RV32-NEXT:  .LBB24_34: # %else89
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vmv.x.s a1, v16
+; RV32-NEXT:    bltz a2, .LBB24_96
+; RV32-NEXT:  # %bb.35: # %else92
+; RV32-NEXT:    andi a2, a1, 1
+; RV32-NEXT:    bnez a2, .LBB24_97
+; RV32-NEXT:  .LBB24_36: # %else95
+; RV32-NEXT:    andi a2, a1, 2
+; RV32-NEXT:    bnez a2, .LBB24_98
+; RV32-NEXT:  .LBB24_37: # %else98
+; RV32-NEXT:    andi a2, a1, 4
+; RV32-NEXT:    bnez a2, .LBB24_99
+; RV32-NEXT:  .LBB24_38: # %else101
+; RV32-NEXT:    andi a2, a1, 8
+; RV32-NEXT:    bnez a2, .LBB24_100
+; RV32-NEXT:  .LBB24_39: # %else104
+; RV32-NEXT:    andi a2, a1, 16
+; RV32-NEXT:    bnez a2, .LBB24_101
+; RV32-NEXT:  .LBB24_40: # %else107
+; RV32-NEXT:    andi a2, a1, 32
+; RV32-NEXT:    bnez a2, .LBB24_102
+; RV32-NEXT:  .LBB24_41: # %else110
+; RV32-NEXT:    andi a2, a1, 64
+; RV32-NEXT:    bnez a2, .LBB24_103
+; RV32-NEXT:  .LBB24_42: # %else113
+; RV32-NEXT:    andi a2, a1, 128
+; RV32-NEXT:    bnez a2, .LBB24_104
+; RV32-NEXT:  .LBB24_43: # %else116
+; RV32-NEXT:    andi a2, a1, 256
+; RV32-NEXT:    bnez a2, .LBB24_105
+; RV32-NEXT:  .LBB24_44: # %else119
+; RV32-NEXT:    andi a2, a1, 512
+; RV32-NEXT:    bnez a2, .LBB24_106
+; RV32-NEXT:  .LBB24_45: # %else122
+; RV32-NEXT:    andi a2, a1, 1024
+; RV32-NEXT:    bnez a2, .LBB24_107
+; RV32-NEXT:  .LBB24_46: # %else125
+; RV32-NEXT:    slli a2, a1, 20
+; RV32-NEXT:    bltz a2, .LBB24_108
+; RV32-NEXT:  .LBB24_47: # %else128
+; RV32-NEXT:    slli a2, a1, 19
+; RV32-NEXT:    bltz a2, .LBB24_109
+; RV32-NEXT:  .LBB24_48: # %else131
+; RV32-NEXT:    slli a2, a1, 18
+; RV32-NEXT:    bltz a2, .LBB24_110
+; RV32-NEXT:  .LBB24_49: # %else134
+; RV32-NEXT:    slli a2, a1, 17
+; RV32-NEXT:    bltz a2, .LBB24_111
+; RV32-NEXT:  .LBB24_50: # %else137
+; RV32-NEXT:    slli a2, a1, 16
+; RV32-NEXT:    bltz a2, .LBB24_112
+; RV32-NEXT:  .LBB24_51: # %else140
+; RV32-NEXT:    slli a2, a1, 15
+; RV32-NEXT:    bltz a2, .LBB24_113
+; RV32-NEXT:  .LBB24_52: # %else143
+; RV32-NEXT:    slli a2, a1, 14
+; RV32-NEXT:    bltz a2, .LBB24_114
+; RV32-NEXT:  .LBB24_53: # %else146
+; RV32-NEXT:    slli a2, a1, 13
+; RV32-NEXT:    bltz a2, .LBB24_115
+; RV32-NEXT:  .LBB24_54: # %else149
+; RV32-NEXT:    slli a2, a1, 12
+; RV32-NEXT:    bltz a2, .LBB24_116
+; RV32-NEXT:  .LBB24_55: # %else152
+; RV32-NEXT:    slli a2, a1, 11
+; RV32-NEXT:    bltz a2, .LBB24_117
+; RV32-NEXT:  .LBB24_56: # %else155
+; RV32-NEXT:    slli a2, a1, 10
+; RV32-NEXT:    bltz a2, .LBB24_118
+; RV32-NEXT:  .LBB24_57: # %else158
+; RV32-NEXT:    slli a2, a1, 9
+; RV32-NEXT:    bltz a2, .LBB24_119
+; RV32-NEXT:  .LBB24_58: # %else161
+; RV32-NEXT:    slli a2, a1, 8
+; RV32-NEXT:    bltz a2, .LBB24_120
+; RV32-NEXT:  .LBB24_59: # %else164
+; RV32-NEXT:    slli a2, a1, 7
+; RV32-NEXT:    bltz a2, .LBB24_121
+; RV32-NEXT:  .LBB24_60: # %else167
+; RV32-NEXT:    slli a2, a1, 6
+; RV32-NEXT:    bltz a2, .LBB24_122
+; RV32-NEXT:  .LBB24_61: # %else170
+; RV32-NEXT:    slli a2, a1, 5
+; RV32-NEXT:    bltz a2, .LBB24_123
+; RV32-NEXT:  .LBB24_62: # %else173
+; RV32-NEXT:    slli a2, a1, 4
+; RV32-NEXT:    bltz a2, .LBB24_124
+; RV32-NEXT:  .LBB24_63: # %else176
+; RV32-NEXT:    slli a2, a1, 3
+; RV32-NEXT:    bltz a2, .LBB24_125
+; RV32-NEXT:  .LBB24_64: # %else179
+; RV32-NEXT:    slli a2, a1, 2
+; RV32-NEXT:    bltz a2, .LBB24_126
+; RV32-NEXT:  .LBB24_65: # %else182
+; RV32-NEXT:    slli a2, a1, 1
+; RV32-NEXT:    bltz a2, .LBB24_127
+; RV32-NEXT:  .LBB24_66: # %else185
+; RV32-NEXT:    bltz a1, .LBB24_128
+; RV32-NEXT:  .LBB24_67: # %else188
+; RV32-NEXT:    ret
+; RV32-NEXT:  .LBB24_68: # %cond.load
+; RV32-NEXT:    lh a1, 0(a0)
+; RV32-NEXT:    fmv.x.h a3, fa5
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT:    vmv.v.x v8, a3
+; RV32-NEXT:    vsetvli zero, zero, e16, m8, tu, ma
+; RV32-NEXT:    vmv.s.x v8, a1
+; RV32-NEXT:    andi a1, a2, 2
+; RV32-NEXT:    beqz a1, .LBB24_2
+; RV32-NEXT:  .LBB24_69: # %cond.load1
+; RV32-NEXT:    lh a1, 2(a0)
+; RV32-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 1
+; RV32-NEXT:    andi a1, a2, 4
+; RV32-NEXT:    beqz a1, .LBB24_3
+; RV32-NEXT:  .LBB24_70: # %cond.load4
+; RV32-NEXT:    lh a1, 4(a0)
+; RV32-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 2
+; RV32-NEXT:    andi a1, a2, 8
+; RV32-NEXT:    beqz a1, .LBB24_4
+; RV32-NEXT:  .LBB24_71: # %cond.load7
+; RV32-NEXT:    lh a1, 6(a0)
+; RV32-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 3
+; RV32-NEXT:    andi a1, a2, 16
+; RV32-NEXT:    beqz a1, .LBB24_5
+; RV32-NEXT:  .LBB24_72: # %cond.load10
+; RV32-NEXT:    lh a1, 8(a0)
+; RV32-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 4
+; RV32-NEXT:    andi a1, a2, 32
+; RV32-NEXT:    beqz a1, .LBB24_6
+; RV32-NEXT:  .LBB24_73: # %cond.load13
+; RV32-NEXT:    lh a1, 10(a0)
+; RV32-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 5
+; RV32-NEXT:    andi a1, a2, 64
+; RV32-NEXT:    beqz a1, .LBB24_7
+; RV32-NEXT:  .LBB24_74: # %cond.load16
+; RV32-NEXT:    lh a1, 12(a0)
+; RV32-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 6
+; RV32-NEXT:    andi a1, a2, 128
+; RV32-NEXT:    beqz a1, .LBB24_8
+; RV32-NEXT:  .LBB24_75: # %cond.load19
+; RV32-NEXT:    lh a1, 14(a0)
+; RV32-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 7
+; RV32-NEXT:    andi a1, a2, 256
+; RV32-NEXT:    beqz a1, .LBB24_9
+; RV32-NEXT:  .LBB24_76: # %cond.load22
+; RV32-NEXT:    lh a1, 16(a0)
+; RV32-NEXT:    vsetivli zero, 9, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 8
+; RV32-NEXT:    andi a1, a2, 512
+; RV32-NEXT:    beqz a1, .LBB24_10
+; RV32-NEXT:  .LBB24_77: # %cond.load25
+; RV32-NEXT:    lh a1, 18(a0)
+; RV32-NEXT:    vsetivli zero, 10, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 9
+; RV32-NEXT:    andi a1, a2, 1024
+; RV32-NEXT:    beqz a1, .LBB24_11
+; RV32-NEXT:  .LBB24_78: # %cond.load28
+; RV32-NEXT:    lh a1, 20(a0)
+; RV32-NEXT:    vsetivli zero, 11, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 10
+; RV32-NEXT:    slli a1, a2, 20
+; RV32-NEXT:    bgez a1, .LBB24_12
+; RV32-NEXT:  .LBB24_79: # %cond.load31
+; RV32-NEXT:    lh a1, 22(a0)
+; RV32-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 11
+; RV32-NEXT:    slli a1, a2, 19
+; RV32-NEXT:    bgez a1, .LBB24_13
+; RV32-NEXT:  .LBB24_80: # %cond.load34
+; RV32-NEXT:    lh a1, 24(a0)
+; RV32-NEXT:    vsetivli zero, 13, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 12
+; RV32-NEXT:    slli a1, a2, 18
+; RV32-NEXT:    bgez a1, .LBB24_14
+; RV32-NEXT:  .LBB24_81: # %cond.load37
+; RV32-NEXT:    lh a1, 26(a0)
+; RV32-NEXT:    vsetivli zero, 14, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 13
+; RV32-NEXT:    slli a1, a2, 17
+; RV32-NEXT:    bgez a1, .LBB24_15
+; RV32-NEXT:  .LBB24_82: # %cond.load40
+; RV32-NEXT:    lh a1, 28(a0)
+; RV32-NEXT:    vsetivli zero, 15, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 14
+; RV32-NEXT:    slli a1, a2, 16
+; RV32-NEXT:    bgez a1, .LBB24_16
+; RV32-NEXT:  .LBB24_83: # %cond.load43
+; RV32-NEXT:    lh a1, 30(a0)
+; RV32-NEXT:    vsetivli zero, 16, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 15
+; RV32-NEXT:    slli a1, a2, 15
+; RV32-NEXT:    bgez a1, .LBB24_17
+; RV32-NEXT:  .LBB24_84: # %cond.load46
+; RV32-NEXT:    lh a1, 32(a0)
+; RV32-NEXT:    vsetivli zero, 17, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 16
+; RV32-NEXT:    slli a1, a2, 14
+; RV32-NEXT:    bgez a1, .LBB24_18
+; RV32-NEXT:  .LBB24_85: # %cond.load49
+; RV32-NEXT:    lh a1, 34(a0)
+; RV32-NEXT:    vsetivli zero, 18, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 17
+; RV32-NEXT:    slli a1, a2, 13
+; RV32-NEXT:    bgez a1, .LBB24_19
+; RV32-NEXT:  .LBB24_86: # %cond.load52
+; RV32-NEXT:    lh a1, 36(a0)
+; RV32-NEXT:    vsetivli zero, 19, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 18
+; RV32-NEXT:    slli a1, a2, 12
+; RV32-NEXT:    bgez a1, .LBB24_20
+; RV32-NEXT:  .LBB24_87: # %cond.load55
+; RV32-NEXT:    lh a1, 38(a0)
+; RV32-NEXT:    vsetivli zero, 20, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 19
+; RV32-NEXT:    slli a1, a2, 11
+; RV32-NEXT:    bgez a1, .LBB24_21
+; RV32-NEXT:  .LBB24_88: # %cond.load58
+; RV32-NEXT:    lh a1, 40(a0)
+; RV32-NEXT:    vsetivli zero, 21, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 20
+; RV32-NEXT:    slli a1, a2, 10
+; RV32-NEXT:    bgez a1, .LBB24_22
+; RV32-NEXT:  .LBB24_89: # %cond.load61
+; RV32-NEXT:    lh a1, 42(a0)
+; RV32-NEXT:    vsetivli zero, 22, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 21
+; RV32-NEXT:    slli a1, a2, 9
+; RV32-NEXT:    bgez a1, .LBB24_23
+; RV32-NEXT:  .LBB24_90: # %cond.load64
+; RV32-NEXT:    lh a1, 44(a0)
+; RV32-NEXT:    vsetivli zero, 23, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 22
+; RV32-NEXT:    slli a1, a2, 8
+; RV32-NEXT:    bgez a1, .LBB24_24
+; RV32-NEXT:  .LBB24_91: # %cond.load67
+; RV32-NEXT:    lh a1, 46(a0)
+; RV32-NEXT:    vsetivli zero, 24, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 23
+; RV32-NEXT:    slli a1, a2, 7
+; RV32-NEXT:    bgez a1, .LBB24_25
+; RV32-NEXT:  .LBB24_92: # %cond.load70
+; RV32-NEXT:    lh a1, 48(a0)
+; RV32-NEXT:    vsetivli zero, 25, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 24
+; RV32-NEXT:    slli a1, a2, 6
+; RV32-NEXT:    bgez a1, .LBB24_26
+; RV32-NEXT:  .LBB24_93: # %cond.load73
+; RV32-NEXT:    lh a1, 50(a0)
+; RV32-NEXT:    vsetivli zero, 26, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 25
+; RV32-NEXT:    slli a1, a2, 5
+; RV32-NEXT:    bgez a1, .LBB24_27
+; RV32-NEXT:  .LBB24_94: # %cond.load76
+; RV32-NEXT:    lh a1, 52(a0)
+; RV32-NEXT:    vsetivli zero, 27, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 26
+; RV32-NEXT:    slli a1, a2, 4
+; RV32-NEXT:    bgez a1, .LBB24_28
+; RV32-NEXT:  .LBB24_95: # %cond.load79
+; RV32-NEXT:    lh a1, 54(a0)
+; RV32-NEXT:    vsetivli zero, 28, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 27
+; RV32-NEXT:    slli a1, a2, 3
+; RV32-NEXT:    bltz a1, .LBB24_29
+; RV32-NEXT:    j .LBB24_30
+; RV32-NEXT:  .LBB24_96: # %cond.load91
+; RV32-NEXT:    lh a2, 62(a0)
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 32
+; RV32-NEXT:    vsetvli zero, a2, e16, m4, tu, ma
+; RV32-NEXT:    vslideup.vi v8, v16, 31
+; RV32-NEXT:    andi a2, a1, 1
+; RV32-NEXT:    beqz a2, .LBB24_36
+; RV32-NEXT:  .LBB24_97: # %cond.load94
+; RV32-NEXT:    lh a2, 64(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 33
+; RV32-NEXT:    li a3, 32
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    andi a2, a1, 2
+; RV32-NEXT:    beqz a2, .LBB24_37
+; RV32-NEXT:  .LBB24_98: # %cond.load97
+; RV32-NEXT:    lh a2, 66(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 34
+; RV32-NEXT:    li a3, 33
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    andi a2, a1, 4
+; RV32-NEXT:    beqz a2, .LBB24_38
+; RV32-NEXT:  .LBB24_99: # %cond.load100
+; RV32-NEXT:    lh a2, 68(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 35
+; RV32-NEXT:    li a3, 34
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    andi a2, a1, 8
+; RV32-NEXT:    beqz a2, .LBB24_39
+; RV32-NEXT:  .LBB24_100: # %cond.load103
+; RV32-NEXT:    lh a2, 70(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 36
+; RV32-NEXT:    li a3, 35
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    andi a2, a1, 16
+; RV32-NEXT:    beqz a2, .LBB24_40
+; RV32-NEXT:  .LBB24_101: # %cond.load106
+; RV32-NEXT:    lh a2, 72(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 37
+; RV32-NEXT:    li a3, 36
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    andi a2, a1, 32
+; RV32-NEXT:    beqz a2, .LBB24_41
+; RV32-NEXT:  .LBB24_102: # %cond.load109
+; RV32-NEXT:    lh a2, 74(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 38
+; RV32-NEXT:    li a3, 37
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    andi a2, a1, 64
+; RV32-NEXT:    beqz a2, .LBB24_42
+; RV32-NEXT:  .LBB24_103: # %cond.load112
+; RV32-NEXT:    lh a2, 76(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 39
+; RV32-NEXT:    li a3, 38
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    andi a2, a1, 128
+; RV32-NEXT:    beqz a2, .LBB24_43
+; RV32-NEXT:  .LBB24_104: # %cond.load115
+; RV32-NEXT:    lh a2, 78(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 40
+; RV32-NEXT:    li a3, 39
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    andi a2, a1, 256
+; RV32-NEXT:    beqz a2, .LBB24_44
+; RV32-NEXT:  .LBB24_105: # %cond.load118
+; RV32-NEXT:    lh a2, 80(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 41
+; RV32-NEXT:    li a3, 40
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    andi a2, a1, 512
+; RV32-NEXT:    beqz a2, .LBB24_45
+; RV32-NEXT:  .LBB24_106: # %cond.load121
+; RV32-NEXT:    lh a2, 82(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 42
+; RV32-NEXT:    li a3, 41
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    andi a2, a1, 1024
+; RV32-NEXT:    beqz a2, .LBB24_46
+; RV32-NEXT:  .LBB24_107: # %cond.load124
+; RV32-NEXT:    lh a2, 84(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 43
+; RV32-NEXT:    li a3, 42
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    slli a2, a1, 20
+; RV32-NEXT:    bgez a2, .LBB24_47
+; RV32-NEXT:  .LBB24_108: # %cond.load127
+; RV32-NEXT:    lh a2, 86(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 44
+; RV32-NEXT:    li a3, 43
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    slli a2, a1, 19
+; RV32-NEXT:    bgez a2, .LBB24_48
+; RV32-NEXT:  .LBB24_109: # %cond.load130
+; RV32-NEXT:    lh a2, 88(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 45
+; RV32-NEXT:    li a3, 44
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    slli a2, a1, 18
+; RV32-NEXT:    bgez a2, .LBB24_49
+; RV32-NEXT:  .LBB24_110: # %cond.load133
+; RV32-NEXT:    lh a2, 90(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 46
+; RV32-NEXT:    li a3, 45
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    slli a2, a1, 17
+; RV32-NEXT:    bgez a2, .LBB24_50
+; RV32-NEXT:  .LBB24_111: # %cond.load136
+; RV32-NEXT:    lh a2, 92(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 47
+; RV32-NEXT:    li a3, 46
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    slli a2, a1, 16
+; RV32-NEXT:    bgez a2, .LBB24_51
+; RV32-NEXT:  .LBB24_112: # %cond.load139
+; RV32-NEXT:    lh a2, 94(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 48
+; RV32-NEXT:    li a3, 47
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    slli a2, a1, 15
+; RV32-NEXT:    bgez a2, .LBB24_52
+; RV32-NEXT:  .LBB24_113: # %cond.load142
+; RV32-NEXT:    lh a2, 96(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 49
+; RV32-NEXT:    li a3, 48
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    slli a2, a1, 14
+; RV32-NEXT:    bgez a2, .LBB24_53
+; RV32-NEXT:  .LBB24_114: # %cond.load145
+; RV32-NEXT:    lh a2, 98(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 50
+; RV32-NEXT:    li a3, 49
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    slli a2, a1, 13
+; RV32-NEXT:    bgez a2, .LBB24_54
+; RV32-NEXT:  .LBB24_115: # %cond.load148
+; RV32-NEXT:    lh a2, 100(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 51
+; RV32-NEXT:    li a3, 50
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    slli a2, a1, 12
+; RV32-NEXT:    bgez a2, .LBB24_55
+; RV32-NEXT:  .LBB24_116: # %cond.load151
+; RV32-NEXT:    lh a2, 102(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 52
+; RV32-NEXT:    li a3, 51
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    slli a2, a1, 11
+; RV32-NEXT:    bgez a2, .LBB24_56
+; RV32-NEXT:  .LBB24_117: # %cond.load154
+; RV32-NEXT:    lh a2, 104(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 53
+; RV32-NEXT:    li a3, 52
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    slli a2, a1, 10
+; RV32-NEXT:    bgez a2, .LBB24_57
+; RV32-NEXT:  .LBB24_118: # %cond.load157
+; RV32-NEXT:    lh a2, 106(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 54
+; RV32-NEXT:    li a3, 53
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    slli a2, a1, 9
+; RV32-NEXT:    bgez a2, .LBB24_58
+; RV32-NEXT:  .LBB24_119: # %cond.load160
+; RV32-NEXT:    lh a2, 108(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 55
+; RV32-NEXT:    li a3, 54
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    slli a2, a1, 8
+; RV32-NEXT:    bgez a2, .LBB24_59
+; RV32-NEXT:  .LBB24_120: # %cond.load163
+; RV32-NEXT:    lh a2, 110(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 56
+; RV32-NEXT:    li a3, 55
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    slli a2, a1, 7
+; RV32-NEXT:    bgez a2, .LBB24_60
+; RV32-NEXT:  .LBB24_121: # %cond.load166
+; RV32-NEXT:    lh a2, 112(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 57
+; RV32-NEXT:    li a3, 56
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    slli a2, a1, 6
+; RV32-NEXT:    bgez a2, .LBB24_61
+; RV32-NEXT:  .LBB24_122: # %cond.load169
+; RV32-NEXT:    lh a2, 114(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 58
+; RV32-NEXT:    li a3, 57
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    slli a2, a1, 5
+; RV32-NEXT:    bgez a2, .LBB24_62
+; RV32-NEXT:  .LBB24_123: # %cond.load172
+; RV32-NEXT:    lh a2, 116(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 59
+; RV32-NEXT:    li a3, 58
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    slli a2, a1, 4
+; RV32-NEXT:    bgez a2, .LBB24_63
+; RV32-NEXT:  .LBB24_124: # %cond.load175
+; RV32-NEXT:    lh a2, 118(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 60
+; RV32-NEXT:    li a3, 59
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    slli a2, a1, 3
+; RV32-NEXT:    bgez a2, .LBB24_64
+; RV32-NEXT:  .LBB24_125: # %cond.load178
+; RV32-NEXT:    lh a2, 120(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 61
+; RV32-NEXT:    li a3, 60
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    slli a2, a1, 2
+; RV32-NEXT:    bgez a2, .LBB24_65
+; RV32-NEXT:  .LBB24_126: # %cond.load181
+; RV32-NEXT:    lh a2, 122(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 62
+; RV32-NEXT:    li a3, 61
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    slli a2, a1, 1
+; RV32-NEXT:    bgez a2, .LBB24_66
+; RV32-NEXT:  .LBB24_127: # %cond.load184
+; RV32-NEXT:    lh a2, 124(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 63
+; RV32-NEXT:    li a3, 62
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:    bgez a1, .LBB24_67
+; RV32-NEXT:  .LBB24_128: # %cond.load187
+; RV32-NEXT:    lh a0, 126(a0)
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a0
+; RV32-NEXT:    li a0, 63
+; RV32-NEXT:    vslideup.vx v8, v16, a0
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: masked_load_v32f64:
+; RV64-LABEL: masked_load_v64bf16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi a3, a1, 128
-; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT:    vle64.v v16, (a1)
-; RV64-NEXT:    vle64.v v24, (a3)
-; RV64-NEXT:    fmv.d.x fa5, zero
-; RV64-NEXT:    vmfeq.vf v8, v16, fa5
-; RV64-NEXT:    vmfeq.vf v0, v24, fa5
-; RV64-NEXT:    addi a1, a0, 128
-; RV64-NEXT:    vle64.v v16, (a1), v0.t
-; RV64-NEXT:    vmv1r.v v0, v8
-; RV64-NEXT:    vle64.v v8, (a0), v0.t
-; RV64-NEXT:    vse64.v v8, (a2)
-; RV64-NEXT:    addi a0, a2, 128
-; RV64-NEXT:    vse64.v v16, (a0)
+; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT:    vmv.x.s a1, v0
+; RV64-NEXT:    andi a2, a1, 1
+; RV64-NEXT:    # implicit-def: $v8m8
+; RV64-NEXT:    bnez a2, .LBB24_65
+; RV64-NEXT:  # %bb.1: # %else
+; RV64-NEXT:    andi a2, a1, 2
+; RV64-NEXT:    bnez a2, .LBB24_66
+; RV64-NEXT:  .LBB24_2: # %else2
+; RV64-NEXT:    andi a2, a1, 4
+; RV64-NEXT:    bnez a2, .LBB24_67
+; RV64-NEXT:  .LBB24_3: # %else5
+; RV64-NEXT:    andi a2, a1, 8
+; RV64-NEXT:    bnez a2, .LBB24_68
+; RV64-NEXT:  .LBB24_4: # %else8
+; RV64-NEXT:    andi a2, a1, 16
+; RV64-NEXT:    bnez a2, .LBB24_69
+; RV64-NEXT:  .LBB24_5: # %else11
+; RV64-NEXT:    andi a2, a1, 32
+; RV64-NEXT:    bnez a2, .LBB24_70
+; RV64-NEXT:  .LBB24_6: # %else14
+; RV64-NEXT:    andi a2, a1, 64
+; RV64-NEXT:    bnez a2, .LBB24_71
+; RV64-NEXT:  .LBB24_7: # %else17
+; RV64-NEXT:    andi a2, a1, 128
+; RV64-NEXT:    bnez a2, .LBB24_72
+; RV64-NEXT:  .LBB24_8: # %else20
+; RV64-NEXT:    andi a2, a1, 256
+; RV64-NEXT:    bnez a2, .LBB24_73
+; RV64-NEXT:  .LBB24_9: # %else23
+; RV64-NEXT:    andi a2, a1, 512
+; RV64-NEXT:    bnez a2, .LBB24_74
+; RV64-NEXT:  .LBB24_10: # %else26
+; RV64-NEXT:    andi a2, a1, 1024
+; RV64-NEXT:    bnez a2, .LBB24_75
+; RV64-NEXT:  .LBB24_11: # %else29
+; RV64-NEXT:    slli a2, a1, 52
+; RV64-NEXT:    bltz a2, .LBB24_76
+; RV64-NEXT:  .LBB24_12: # %else32
+; RV64-NEXT:    slli a2, a1, 51
+; RV64-NEXT:    bltz a2, .LBB24_77
+; RV64-NEXT:  .LBB24_13: # %else35
+; RV64-NEXT:    slli a2, a1, 50
+; RV64-NEXT:    bltz a2, .LBB24_78
+; RV64-NEXT:  .LBB24_14: # %else38
+; RV64-NEXT:    slli a2, a1, 49
+; RV64-NEXT:    bltz a2, .LBB24_79
+; RV64-NEXT:  .LBB24_15: # %else41
+; RV64-NEXT:    slli a2, a1, 48
+; RV64-NEXT:    bltz a2, .LBB24_80
+; RV64-NEXT:  .LBB24_16: # %else44
+; RV64-NEXT:    slli a2, a1, 47
+; RV64-NEXT:    bltz a2, .LBB24_81
+; RV64-NEXT:  .LBB24_17: # %else47
+; RV64-NEXT:    slli a2, a1, 46
+; RV64-NEXT:    bltz a2, .LBB24_82
+; RV64-NEXT:  .LBB24_18: # %else50
+; RV64-NEXT:    slli a2, a1, 45
+; RV64-NEXT:    bltz a2, .LBB24_83
+; RV64-NEXT:  .LBB24_19: # %else53
+; RV64-NEXT:    slli a2, a1, 44
+; RV64-NEXT:    bltz a2, .LBB24_84
+; RV64-NEXT:  .LBB24_20: # %else56
+; RV64-NEXT:    slli a2, a1, 43
+; RV64-NEXT:    bltz a2, .LBB24_85
+; RV64-NEXT:  .LBB24_21: # %else59
+; RV64-NEXT:    slli a2, a1, 42
+; RV64-NEXT:    bltz a2, .LBB24_86
+; RV64-NEXT:  .LBB24_22: # %else62
+; RV64-NEXT:    slli a2, a1, 41
+; RV64-NEXT:    bltz a2, .LBB24_87
+; RV64-NEXT:  .LBB24_23: # %else65
+; RV64-NEXT:    slli a2, a1, 40
+; RV64-NEXT:    bltz a2, .LBB24_88
+; RV64-NEXT:  .LBB24_24: # %else68
+; RV64-NEXT:    slli a2, a1, 39
+; RV64-NEXT:    bltz a2, .LBB24_89
+; RV64-NEXT:  .LBB24_25: # %else71
+; RV64-NEXT:    slli a2, a1, 38
+; RV64-NEXT:    bltz a2, .LBB24_90
+; RV64-NEXT:  .LBB24_26: # %else74
+; RV64-NEXT:    slli a2, a1, 37
+; RV64-NEXT:    bltz a2, .LBB24_91
+; RV64-NEXT:  .LBB24_27: # %else77
+; RV64-NEXT:    slli a2, a1, 36
+; RV64-NEXT:    bltz a2, .LBB24_92
+; RV64-NEXT:  .LBB24_28: # %else80
+; RV64-NEXT:    slli a2, a1, 35
+; RV64-NEXT:    bltz a2, .LBB24_93
+; RV64-NEXT:  .LBB24_29: # %else83
+; RV64-NEXT:    slli a2, a1, 34
+; RV64-NEXT:    bltz a2, .LBB24_94
+; RV64-NEXT:  .LBB24_30: # %else86
+; RV64-NEXT:    slli a2, a1, 33
+; RV64-NEXT:    bltz a2, .LBB24_95
+; RV64-NEXT:  .LBB24_31: # %else89
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    bltz a2, .LBB24_96
+; RV64-NEXT:  .LBB24_32: # %else92
+; RV64-NEXT:    slli a2, a1, 31
+; RV64-NEXT:    bltz a2, .LBB24_97
+; RV64-NEXT:  .LBB24_33: # %else95
+; RV64-NEXT:    slli a2, a1, 30
+; RV64-NEXT:    bltz a2, .LBB24_98
+; RV64-NEXT:  .LBB24_34: # %else98
+; RV64-NEXT:    slli a2, a1, 29
+; RV64-NEXT:    bltz a2, .LBB24_99
+; RV64-NEXT:  .LBB24_35: # %else101
+; RV64-NEXT:    slli a2, a1, 28
+; RV64-NEXT:    bltz a2, .LBB24_100
+; RV64-NEXT:  .LBB24_36: # %else104
+; RV64-NEXT:    slli a2, a1, 27
+; RV64-NEXT:    bltz a2, .LBB24_101
+; RV64-NEXT:  .LBB24_37: # %else107
+; RV64-NEXT:    slli a2, a1, 26
+; RV64-NEXT:    bltz a2, .LBB24_102
+; RV64-NEXT:  .LBB24_38: # %else110
+; RV64-NEXT:    slli a2, a1, 25
+; RV64-NEXT:    bltz a2, .LBB24_103
+; RV64-NEXT:  .LBB24_39: # %else113
+; RV64-NEXT:    slli a2, a1, 24
+; RV64-NEXT:    bltz a2, .LBB24_104
+; RV64-NEXT:  .LBB24_40: # %else116
+; RV64-NEXT:    slli a2, a1, 23
+; RV64-NEXT:    bltz a2, .LBB24_105
+; RV64-NEXT:  .LBB24_41: # %else119
+; RV64-NEXT:    slli a2, a1, 22
+; RV64-NEXT:    bltz a2, .LBB24_106
+; RV64-NEXT:  .LBB24_42: # %else122
+; RV64-NEXT:    slli a2, a1, 21
+; RV64-NEXT:    bltz a2, .LBB24_107
+; RV64-NEXT:  .LBB24_43: # %else125
+; RV64-NEXT:    slli a2, a1, 20
+; RV64-NEXT:    bltz a2, .LBB24_108
+; RV64-NEXT:  .LBB24_44: # %else128
+; RV64-NEXT:    slli a2, a1, 19
+; RV64-NEXT:    bltz a2, .LBB24_109
+; RV64-NEXT:  .LBB24_45: # %else131
+; RV64-NEXT:    slli a2, a1, 18
+; RV64-NEXT:    bltz a2, .LBB24_110
+; RV64-NEXT:  .LBB24_46: # %else134
+; RV64-NEXT:    slli a2, a1, 17
+; RV64-NEXT:    bltz a2, .LBB24_111
+; RV64-NEXT:  .LBB24_47: # %else137
+; RV64-NEXT:    slli a2, a1, 16
+; RV64-NEXT:    bltz a2, .LBB24_112
+; RV64-NEXT:  .LBB24_48: # %else140
+; RV64-NEXT:    slli a2, a1, 15
+; RV64-NEXT:    bltz a2, .LBB24_113
+; RV64-NEXT:  .LBB24_49: # %else143
+; RV64-NEXT:    slli a2, a1, 14
+; RV64-NEXT:    bltz a2, .LBB24_114
+; RV64-NEXT:  .LBB24_50: # %else146
+; RV64-NEXT:    slli a2, a1, 13
+; RV64-NEXT:    bltz a2, .LBB24_115
+; RV64-NEXT:  .LBB24_51: # %else149
+; RV64-NEXT:    slli a2, a1, 12
+; RV64-NEXT:    bltz a2, .LBB24_116
+; RV64-NEXT:  .LBB24_52: # %else152
+; RV64-NEXT:    slli a2, a1, 11
+; RV64-NEXT:    bltz a2, .LBB24_117
+; RV64-NEXT:  .LBB24_53: # %else155
+; RV64-NEXT:    slli a2, a1, 10
+; RV64-NEXT:    bltz a2, .LBB24_118
+; RV64-NEXT:  .LBB24_54: # %else158
+; RV64-NEXT:    slli a2, a1, 9
+; RV64-NEXT:    bltz a2, .LBB24_119
+; RV64-NEXT:  .LBB24_55: # %else161
+; RV64-NEXT:    slli a2, a1, 8
+; RV64-NEXT:    bltz a2, .LBB24_120
+; RV64-NEXT:  .LBB24_56: # %else164
+; RV64-NEXT:    slli a2, a1, 7
+; RV64-NEXT:    bltz a2, .LBB24_121
+; RV64-NEXT:  .LBB24_57: # %else167
+; RV64-NEXT:    slli a2, a1, 6
+; RV64-NEXT:    bltz a2, .LBB24_122
+; RV64-NEXT:  .LBB24_58: # %else170
+; RV64-NEXT:    slli a2, a1, 5
+; RV64-NEXT:    bltz a2, .LBB24_123
+; RV64-NEXT:  .LBB24_59: # %else173
+; RV64-NEXT:    slli a2, a1, 4
+; RV64-NEXT:    bltz a2, .LBB24_124
+; RV64-NEXT:  .LBB24_60: # %else176
+; RV64-NEXT:    slli a2, a1, 3
+; RV64-NEXT:    bltz a2, .LBB24_125
+; RV64-NEXT:  .LBB24_61: # %else179
+; RV64-NEXT:    slli a2, a1, 2
+; RV64-NEXT:    bltz a2, .LBB24_126
+; RV64-NEXT:  .LBB24_62: # %else182
+; RV64-NEXT:    slli a2, a1, 1
+; RV64-NEXT:    bltz a2, .LBB24_127
+; RV64-NEXT:  .LBB24_63: # %else185
+; RV64-NEXT:    bltz a1, .LBB24_128
+; RV64-NEXT:  .LBB24_64: # %else188
 ; RV64-NEXT:    ret
-  %m = load <32 x double>, ptr %m_ptr
-  %mask = fcmp oeq <32 x double> %m, zeroinitializer
-  %load = call <32 x double> @llvm.masked.load.v32f64(ptr %a, i32 8, <32 x i1> %mask, <32 x double> undef)
-  store <32 x double> %load, ptr %res_ptr
-  ret void
+; RV64-NEXT:  .LBB24_65: # %cond.load
+; RV64-NEXT:    lh a2, 0(a0)
+; RV64-NEXT:    fmv.x.h a3, fa5
+; RV64-NEXT:    li a4, 64
+; RV64-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a3
+; RV64-NEXT:    vsetvli zero, zero, e16, m8, tu, ma
+; RV64-NEXT:    vmv.s.x v8, a2
+; RV64-NEXT:    andi a2, a1, 2
+; RV64-NEXT:    beqz a2, .LBB24_2
+; RV64-NEXT:  .LBB24_66: # %cond.load1
+; RV64-NEXT:    lh a2, 2(a0)
+; RV64-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 1
+; RV64-NEXT:    andi a2, a1, 4
+; RV64-NEXT:    beqz a2, .LBB24_3
+; RV64-NEXT:  .LBB24_67: # %cond.load4
+; RV64-NEXT:    lh a2, 4(a0)
+; RV64-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 2
+; RV64-NEXT:    andi a2, a1, 8
+; RV64-NEXT:    beqz a2, .LBB24_4
+; RV64-NEXT:  .LBB24_68: # %cond.load7
+; RV64-NEXT:    lh a2, 6(a0)
+; RV64-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 3
+; RV64-NEXT:    andi a2, a1, 16
+; RV64-NEXT:    beqz a2, .LBB24_5
+; RV64-NEXT:  .LBB24_69: # %cond.load10
+; RV64-NEXT:    lh a2, 8(a0)
+; RV64-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 4
+; RV64-NEXT:    andi a2, a1, 32
+; RV64-NEXT:    beqz a2, .LBB24_6
+; RV64-NEXT:  .LBB24_70: # %cond.load13
+; RV64-NEXT:    lh a2, 10(a0)
+; RV64-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 5
+; RV64-NEXT:    andi a2, a1, 64
+; RV64-NEXT:    beqz a2, .LBB24_7
+; RV64-NEXT:  .LBB24_71: # %cond.load16
+; RV64-NEXT:    lh a2, 12(a0)
+; RV64-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 6
+; RV64-NEXT:    andi a2, a1, 128
+; RV64-NEXT:    beqz a2, .LBB24_8
+; RV64-NEXT:  .LBB24_72: # %cond.load19
+; RV64-NEXT:    lh a2, 14(a0)
+; RV64-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 7
+; RV64-NEXT:    andi a2, a1, 256
+; RV64-NEXT:    beqz a2, .LBB24_9
+; RV64-NEXT:  .LBB24_73: # %cond.load22
+; RV64-NEXT:    lh a2, 16(a0)
+; RV64-NEXT:    vsetivli zero, 9, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 8
+; RV64-NEXT:    andi a2, a1, 512
+; RV64-NEXT:    beqz a2, .LBB24_10
+; RV64-NEXT:  .LBB24_74: # %cond.load25
+; RV64-NEXT:    lh a2, 18(a0)
+; RV64-NEXT:    vsetivli zero, 10, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 9
+; RV64-NEXT:    andi a2, a1, 1024
+; RV64-NEXT:    beqz a2, .LBB24_11
+; RV64-NEXT:  .LBB24_75: # %cond.load28
+; RV64-NEXT:    lh a2, 20(a0)
+; RV64-NEXT:    vsetivli zero, 11, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 10
+; RV64-NEXT:    slli a2, a1, 52
+; RV64-NEXT:    bgez a2, .LBB24_12
+; RV64-NEXT:  .LBB24_76: # %cond.load31
+; RV64-NEXT:    lh a2, 22(a0)
+; RV64-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 11
+; RV64-NEXT:    slli a2, a1, 51
+; RV64-NEXT:    bgez a2, .LBB24_13
+; RV64-NEXT:  .LBB24_77: # %cond.load34
+; RV64-NEXT:    lh a2, 24(a0)
+; RV64-NEXT:    vsetivli zero, 13, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 12
+; RV64-NEXT:    slli a2, a1, 50
+; RV64-NEXT:    bgez a2, .LBB24_14
+; RV64-NEXT:  .LBB24_78: # %cond.load37
+; RV64-NEXT:    lh a2, 26(a0)
+; RV64-NEXT:    vsetivli zero, 14, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 13
+; RV64-NEXT:    slli a2, a1, 49
+; RV64-NEXT:    bgez a2, .LBB24_15
+; RV64-NEXT:  .LBB24_79: # %cond.load40
+; RV64-NEXT:    lh a2, 28(a0)
+; RV64-NEXT:    vsetivli zero, 15, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 14
+; RV64-NEXT:    slli a2, a1, 48
+; RV64-NEXT:    bgez a2, .LBB24_16
+; RV64-NEXT:  .LBB24_80: # %cond.load43
+; RV64-NEXT:    lh a2, 30(a0)
+; RV64-NEXT:    vsetivli zero, 16, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 15
+; RV64-NEXT:    slli a2, a1, 47
+; RV64-NEXT:    bgez a2, .LBB24_17
+; RV64-NEXT:  .LBB24_81: # %cond.load46
+; RV64-NEXT:    lh a2, 32(a0)
+; RV64-NEXT:    vsetivli zero, 17, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 16
+; RV64-NEXT:    slli a2, a1, 46
+; RV64-NEXT:    bgez a2, .LBB24_18
+; RV64-NEXT:  .LBB24_82: # %cond.load49
+; RV64-NEXT:    lh a2, 34(a0)
+; RV64-NEXT:    vsetivli zero, 18, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 17
+; RV64-NEXT:    slli a2, a1, 45
+; RV64-NEXT:    bgez a2, .LBB24_19
+; RV64-NEXT:  .LBB24_83: # %cond.load52
+; RV64-NEXT:    lh a2, 36(a0)
+; RV64-NEXT:    vsetivli zero, 19, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 18
+; RV64-NEXT:    slli a2, a1, 44
+; RV64-NEXT:    bgez a2, .LBB24_20
+; RV64-NEXT:  .LBB24_84: # %cond.load55
+; RV64-NEXT:    lh a2, 38(a0)
+; RV64-NEXT:    vsetivli zero, 20, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 19
+; RV64-NEXT:    slli a2, a1, 43
+; RV64-NEXT:    bgez a2, .LBB24_21
+; RV64-NEXT:  .LBB24_85: # %cond.load58
+; RV64-NEXT:    lh a2, 40(a0)
+; RV64-NEXT:    vsetivli zero, 21, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 20
+; RV64-NEXT:    slli a2, a1, 42
+; RV64-NEXT:    bgez a2, .LBB24_22
+; RV64-NEXT:  .LBB24_86: # %cond.load61
+; RV64-NEXT:    lh a2, 42(a0)
+; RV64-NEXT:    vsetivli zero, 22, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 21
+; RV64-NEXT:    slli a2, a1, 41
+; RV64-NEXT:    bgez a2, .LBB24_23
+; RV64-NEXT:  .LBB24_87: # %cond.load64
+; RV64-NEXT:    lh a2, 44(a0)
+; RV64-NEXT:    vsetivli zero, 23, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 22
+; RV64-NEXT:    slli a2, a1, 40
+; RV64-NEXT:    bgez a2, .LBB24_24
+; RV64-NEXT:  .LBB24_88: # %cond.load67
+; RV64-NEXT:    lh a2, 46(a0)
+; RV64-NEXT:    vsetivli zero, 24, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 23
+; RV64-NEXT:    slli a2, a1, 39
+; RV64-NEXT:    bgez a2, .LBB24_25
+; RV64-NEXT:  .LBB24_89: # %cond.load70
+; RV64-NEXT:    lh a2, 48(a0)
+; RV64-NEXT:    vsetivli zero, 25, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 24
+; RV64-NEXT:    slli a2, a1, 38
+; RV64-NEXT:    bgez a2, .LBB24_26
+; RV64-NEXT:  .LBB24_90: # %cond.load73
+; RV64-NEXT:    lh a2, 50(a0)
+; RV64-NEXT:    vsetivli zero, 26, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 25
+; RV64-NEXT:    slli a2, a1, 37
+; RV64-NEXT:    bgez a2, .LBB24_27
+; RV64-NEXT:  .LBB24_91: # %cond.load76
+; RV64-NEXT:    lh a2, 52(a0)
+; RV64-NEXT:    vsetivli zero, 27, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 26
+; RV64-NEXT:    slli a2, a1, 36
+; RV64-NEXT:    bgez a2, .LBB24_28
+; RV64-NEXT:  .LBB24_92: # %cond.load79
+; RV64-NEXT:    lh a2, 54(a0)
+; RV64-NEXT:    vsetivli zero, 28, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 27
+; RV64-NEXT:    slli a2, a1, 35
+; RV64-NEXT:    bgez a2, .LBB24_29
+; RV64-NEXT:  .LBB24_93: # %cond.load82
+; RV64-NEXT:    lh a2, 56(a0)
+; RV64-NEXT:    vsetivli zero, 29, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 28
+; RV64-NEXT:    slli a2, a1, 34
+; RV64-NEXT:    bgez a2, .LBB24_30
+; RV64-NEXT:  .LBB24_94: # %cond.load85
+; RV64-NEXT:    lh a2, 58(a0)
+; RV64-NEXT:    vsetivli zero, 30, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 29
+; RV64-NEXT:    slli a2, a1, 33
+; RV64-NEXT:    bgez a2, .LBB24_31
+; RV64-NEXT:  .LBB24_95: # %cond.load88
+; RV64-NEXT:    lh a2, 60(a0)
+; RV64-NEXT:    vsetivli zero, 31, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    vslideup.vi v8, v16, 30
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    bgez a2, .LBB24_32
+; RV64-NEXT:  .LBB24_96: # %cond.load91
+; RV64-NEXT:    lh a2, 62(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 32
+; RV64-NEXT:    vsetvli zero, a2, e16, m4, tu, ma
+; RV64-NEXT:    vslideup.vi v8, v16, 31
+; RV64-NEXT:    slli a2, a1, 31
+; RV64-NEXT:    bgez a2, .LBB24_33
+; RV64-NEXT:  .LBB24_97: # %cond.load94
+; RV64-NEXT:    lh a2, 64(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 33
+; RV64-NEXT:    li a3, 32
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 30
+; RV64-NEXT:    bgez a2, .LBB24_34
+; RV64-NEXT:  .LBB24_98: # %cond.load97
+; RV64-NEXT:    lh a2, 66(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 34
+; RV64-NEXT:    li a3, 33
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 29
+; RV64-NEXT:    bgez a2, .LBB24_35
+; RV64-NEXT:  .LBB24_99: # %cond.load100
+; RV64-NEXT:    lh a2, 68(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 35
+; RV64-NEXT:    li a3, 34
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 28
+; RV64-NEXT:    bgez a2, .LBB24_36
+; RV64-NEXT:  .LBB24_100: # %cond.load103
+; RV64-NEXT:    lh a2, 70(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 36
+; RV64-NEXT:    li a3, 35
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 27
+; RV64-NEXT:    bgez a2, .LBB24_37
+; RV64-NEXT:  .LBB24_101: # %cond.load106
+; RV64-NEXT:    lh a2, 72(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 37
+; RV64-NEXT:    li a3, 36
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 26
+; RV64-NEXT:    bgez a2, .LBB24_38
+; RV64-NEXT:  .LBB24_102: # %cond.load109
+; RV64-NEXT:    lh a2, 74(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 38
+; RV64-NEXT:    li a3, 37
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 25
+; RV64-NEXT:    bgez a2, .LBB24_39
+; RV64-NEXT:  .LBB24_103: # %cond.load112
+; RV64-NEXT:    lh a2, 76(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 39
+; RV64-NEXT:    li a3, 38
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 24
+; RV64-NEXT:    bgez a2, .LBB24_40
+; RV64-NEXT:  .LBB24_104: # %cond.load115
+; RV64-NEXT:    lh a2, 78(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 40
+; RV64-NEXT:    li a3, 39
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 23
+; RV64-NEXT:    bgez a2, .LBB24_41
+; RV64-NEXT:  .LBB24_105: # %cond.load118
+; RV64-NEXT:    lh a2, 80(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 41
+; RV64-NEXT:    li a3, 40
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 22
+; RV64-NEXT:    bgez a2, .LBB24_42
+; RV64-NEXT:  .LBB24_106: # %cond.load121
+; RV64-NEXT:    lh a2, 82(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 42
+; RV64-NEXT:    li a3, 41
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 21
+; RV64-NEXT:    bgez a2, .LBB24_43
+; RV64-NEXT:  .LBB24_107: # %cond.load124
+; RV64-NEXT:    lh a2, 84(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 43
+; RV64-NEXT:    li a3, 42
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 20
+; RV64-NEXT:    bgez a2, .LBB24_44
+; RV64-NEXT:  .LBB24_108: # %cond.load127
+; RV64-NEXT:    lh a2, 86(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 44
+; RV64-NEXT:    li a3, 43
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 19
+; RV64-NEXT:    bgez a2, .LBB24_45
+; RV64-NEXT:  .LBB24_109: # %cond.load130
+; RV64-NEXT:    lh a2, 88(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 45
+; RV64-NEXT:    li a3, 44
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 18
+; RV64-NEXT:    bgez a2, .LBB24_46
+; RV64-NEXT:  .LBB24_110: # %cond.load133
+; RV64-NEXT:    lh a2, 90(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 46
+; RV64-NEXT:    li a3, 45
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 17
+; RV64-NEXT:    bgez a2, .LBB24_47
+; RV64-NEXT:  .LBB24_111: # %cond.load136
+; RV64-NEXT:    lh a2, 92(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 47
+; RV64-NEXT:    li a3, 46
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 16
+; RV64-NEXT:    bgez a2, .LBB24_48
+; RV64-NEXT:  .LBB24_112: # %cond.load139
+; RV64-NEXT:    lh a2, 94(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 48
+; RV64-NEXT:    li a3, 47
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 15
+; RV64-NEXT:    bgez a2, .LBB24_49
+; RV64-NEXT:  .LBB24_113: # %cond.load142
+; RV64-NEXT:    lh a2, 96(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 49
+; RV64-NEXT:    li a3, 48
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 14
+; RV64-NEXT:    bgez a2, .LBB24_50
+; RV64-NEXT:  .LBB24_114: # %cond.load145
+; RV64-NEXT:    lh a2, 98(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 50
+; RV64-NEXT:    li a3, 49
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 13
+; RV64-NEXT:    bgez a2, .LBB24_51
+; RV64-NEXT:  .LBB24_115: # %cond.load148
+; RV64-NEXT:    lh a2, 100(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 51
+; RV64-NEXT:    li a3, 50
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 12
+; RV64-NEXT:    bgez a2, .LBB24_52
+; RV64-NEXT:  .LBB24_116: # %cond.load151
+; RV64-NEXT:    lh a2, 102(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 52
+; RV64-NEXT:    li a3, 51
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 11
+; RV64-NEXT:    bgez a2, .LBB24_53
+; RV64-NEXT:  .LBB24_117: # %cond.load154
+; RV64-NEXT:    lh a2, 104(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 53
+; RV64-NEXT:    li a3, 52
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 10
+; RV64-NEXT:    bgez a2, .LBB24_54
+; RV64-NEXT:  .LBB24_118: # %cond.load157
+; RV64-NEXT:    lh a2, 106(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 54
+; RV64-NEXT:    li a3, 53
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 9
+; RV64-NEXT:    bgez a2, .LBB24_55
+; RV64-NEXT:  .LBB24_119: # %cond.load160
+; RV64-NEXT:    lh a2, 108(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 55
+; RV64-NEXT:    li a3, 54
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 8
+; RV64-NEXT:    bgez a2, .LBB24_56
+; RV64-NEXT:  .LBB24_120: # %cond.load163
+; RV64-NEXT:    lh a2, 110(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 56
+; RV64-NEXT:    li a3, 55
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 7
+; RV64-NEXT:    bgez a2, .LBB24_57
+; RV64-NEXT:  .LBB24_121: # %cond.load166
+; RV64-NEXT:    lh a2, 112(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 57
+; RV64-NEXT:    li a3, 56
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 6
+; RV64-NEXT:    bgez a2, .LBB24_58
+; RV64-NEXT:  .LBB24_122: # %cond.load169
+; RV64-NEXT:    lh a2, 114(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 58
+; RV64-NEXT:    li a3, 57
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 5
+; RV64-NEXT:    bgez a2, .LBB24_59
+; RV64-NEXT:  .LBB24_123: # %cond.load172
+; RV64-NEXT:    lh a2, 116(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 59
+; RV64-NEXT:    li a3, 58
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 4
+; RV64-NEXT:    bgez a2, .LBB24_60
+; RV64-NEXT:  .LBB24_124: # %cond.load175
+; RV64-NEXT:    lh a2, 118(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 60
+; RV64-NEXT:    li a3, 59
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 3
+; RV64-NEXT:    bgez a2, .LBB24_61
+; RV64-NEXT:  .LBB24_125: # %cond.load178
+; RV64-NEXT:    lh a2, 120(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 61
+; RV64-NEXT:    li a3, 60
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 2
+; RV64-NEXT:    bgez a2, .LBB24_62
+; RV64-NEXT:  .LBB24_126: # %cond.load181
+; RV64-NEXT:    lh a2, 122(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 62
+; RV64-NEXT:    li a3, 61
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a2, a1, 1
+; RV64-NEXT:    bgez a2, .LBB24_63
+; RV64-NEXT:  .LBB24_127: # %cond.load184
+; RV64-NEXT:    lh a2, 124(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 63
+; RV64-NEXT:    li a3, 62
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    bgez a1, .LBB24_64
+; RV64-NEXT:  .LBB24_128: # %cond.load187
+; RV64-NEXT:    lh a0, 126(a0)
+; RV64-NEXT:    li a1, 64
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a0
+; RV64-NEXT:    li a0, 63
+; RV64-NEXT:    vslideup.vx v8, v16, a0
+; RV64-NEXT:    ret
+  %load = call <64 x bfloat> @llvm.masked.load.v64bf16(ptr %a, i32 8, <64 x i1> %mask, <64 x bfloat> undef)
+  ret <64 x bfloat> %load
 }
-declare <32 x double> @llvm.masked.load.v32f64(ptr, i32, <32 x i1>, <32 x double>)
 
-define void @masked_load_v64f16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
-; CHECK-LABEL: masked_load_v64f16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a3, 64
-; CHECK-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a1)
-; CHECK-NEXT:    fmv.h.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
-; CHECK-NEXT:    vle16.v v8, (a0), v0.t
-; CHECK-NEXT:    vse16.v v8, (a2)
-; CHECK-NEXT:    ret
-  %m = load <64 x half>, ptr %m_ptr
-  %mask = fcmp oeq <64 x half> %m, zeroinitializer
+define <64 x half> @masked_load_v64f16(ptr %a, <64 x i1> %mask) {
+; ZVFH-LABEL: masked_load_v64f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    li a1, 64
+; ZVFH-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; ZVFH-NEXT:    vle16.v v8, (a0), v0.t
+; ZVFH-NEXT:    ret
+;
+; RV32-ZVFHMIN-LABEL: masked_load_v64f16:
+; RV32-ZVFHMIN:       # %bb.0:
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v0
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 1
+; RV32-ZVFHMIN-NEXT:    # implicit-def: $v8m8
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB25_68
+; RV32-ZVFHMIN-NEXT:  # %bb.1: # %else
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 2
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB25_69
+; RV32-ZVFHMIN-NEXT:  .LBB25_2: # %else2
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 4
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB25_70
+; RV32-ZVFHMIN-NEXT:  .LBB25_3: # %else5
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 8
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB25_71
+; RV32-ZVFHMIN-NEXT:  .LBB25_4: # %else8
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 16
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB25_72
+; RV32-ZVFHMIN-NEXT:  .LBB25_5: # %else11
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 32
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB25_73
+; RV32-ZVFHMIN-NEXT:  .LBB25_6: # %else14
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 64
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB25_74
+; RV32-ZVFHMIN-NEXT:  .LBB25_7: # %else17
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 128
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB25_75
+; RV32-ZVFHMIN-NEXT:  .LBB25_8: # %else20
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 256
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB25_76
+; RV32-ZVFHMIN-NEXT:  .LBB25_9: # %else23
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 512
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB25_77
+; RV32-ZVFHMIN-NEXT:  .LBB25_10: # %else26
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 1024
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB25_78
+; RV32-ZVFHMIN-NEXT:  .LBB25_11: # %else29
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 20
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_79
+; RV32-ZVFHMIN-NEXT:  .LBB25_12: # %else32
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 19
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_80
+; RV32-ZVFHMIN-NEXT:  .LBB25_13: # %else35
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 18
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_81
+; RV32-ZVFHMIN-NEXT:  .LBB25_14: # %else38
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 17
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_82
+; RV32-ZVFHMIN-NEXT:  .LBB25_15: # %else41
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 16
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_83
+; RV32-ZVFHMIN-NEXT:  .LBB25_16: # %else44
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 15
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_84
+; RV32-ZVFHMIN-NEXT:  .LBB25_17: # %else47
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 14
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_85
+; RV32-ZVFHMIN-NEXT:  .LBB25_18: # %else50
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 13
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_86
+; RV32-ZVFHMIN-NEXT:  .LBB25_19: # %else53
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 12
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_87
+; RV32-ZVFHMIN-NEXT:  .LBB25_20: # %else56
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 11
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_88
+; RV32-ZVFHMIN-NEXT:  .LBB25_21: # %else59
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 10
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_89
+; RV32-ZVFHMIN-NEXT:  .LBB25_22: # %else62
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 9
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_90
+; RV32-ZVFHMIN-NEXT:  .LBB25_23: # %else65
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 8
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_91
+; RV32-ZVFHMIN-NEXT:  .LBB25_24: # %else68
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 7
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_92
+; RV32-ZVFHMIN-NEXT:  .LBB25_25: # %else71
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 6
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_93
+; RV32-ZVFHMIN-NEXT:  .LBB25_26: # %else74
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 5
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_94
+; RV32-ZVFHMIN-NEXT:  .LBB25_27: # %else77
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 4
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_95
+; RV32-ZVFHMIN-NEXT:  .LBB25_28: # %else80
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 3
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_30
+; RV32-ZVFHMIN-NEXT:  .LBB25_29: # %cond.load82
+; RV32-ZVFHMIN-NEXT:    lh a1, 56(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 29, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 28
+; RV32-ZVFHMIN-NEXT:  .LBB25_30: # %else83
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 2
+; RV32-ZVFHMIN-NEXT:    li a1, 32
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB25_32
+; RV32-ZVFHMIN-NEXT:  # %bb.31: # %cond.load85
+; RV32-ZVFHMIN-NEXT:    lh a3, 58(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 30, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 29
+; RV32-ZVFHMIN-NEXT:  .LBB25_32: # %else86
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 1
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vsrl.vx v16, v0, a1
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB25_34
+; RV32-ZVFHMIN-NEXT:  # %bb.33: # %cond.load88
+; RV32-ZVFHMIN-NEXT:    lh a1, 60(a0)
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v20, a1
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 31, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v20, 30
+; RV32-ZVFHMIN-NEXT:  .LBB25_34: # %else89
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v16
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_96
+; RV32-ZVFHMIN-NEXT:  # %bb.35: # %else92
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 1
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB25_97
+; RV32-ZVFHMIN-NEXT:  .LBB25_36: # %else95
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 2
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB25_98
+; RV32-ZVFHMIN-NEXT:  .LBB25_37: # %else98
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 4
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB25_99
+; RV32-ZVFHMIN-NEXT:  .LBB25_38: # %else101
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 8
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB25_100
+; RV32-ZVFHMIN-NEXT:  .LBB25_39: # %else104
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 16
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB25_101
+; RV32-ZVFHMIN-NEXT:  .LBB25_40: # %else107
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 32
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB25_102
+; RV32-ZVFHMIN-NEXT:  .LBB25_41: # %else110
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 64
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB25_103
+; RV32-ZVFHMIN-NEXT:  .LBB25_42: # %else113
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 128
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB25_104
+; RV32-ZVFHMIN-NEXT:  .LBB25_43: # %else116
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 256
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB25_105
+; RV32-ZVFHMIN-NEXT:  .LBB25_44: # %else119
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 512
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB25_106
+; RV32-ZVFHMIN-NEXT:  .LBB25_45: # %else122
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 1024
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB25_107
+; RV32-ZVFHMIN-NEXT:  .LBB25_46: # %else125
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 20
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_108
+; RV32-ZVFHMIN-NEXT:  .LBB25_47: # %else128
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 19
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_109
+; RV32-ZVFHMIN-NEXT:  .LBB25_48: # %else131
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 18
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_110
+; RV32-ZVFHMIN-NEXT:  .LBB25_49: # %else134
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 17
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_111
+; RV32-ZVFHMIN-NEXT:  .LBB25_50: # %else137
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 16
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_112
+; RV32-ZVFHMIN-NEXT:  .LBB25_51: # %else140
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 15
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_113
+; RV32-ZVFHMIN-NEXT:  .LBB25_52: # %else143
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 14
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_114
+; RV32-ZVFHMIN-NEXT:  .LBB25_53: # %else146
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 13
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_115
+; RV32-ZVFHMIN-NEXT:  .LBB25_54: # %else149
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 12
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_116
+; RV32-ZVFHMIN-NEXT:  .LBB25_55: # %else152
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 11
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_117
+; RV32-ZVFHMIN-NEXT:  .LBB25_56: # %else155
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 10
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_118
+; RV32-ZVFHMIN-NEXT:  .LBB25_57: # %else158
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 9
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_119
+; RV32-ZVFHMIN-NEXT:  .LBB25_58: # %else161
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 8
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_120
+; RV32-ZVFHMIN-NEXT:  .LBB25_59: # %else164
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 7
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_121
+; RV32-ZVFHMIN-NEXT:  .LBB25_60: # %else167
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 6
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_122
+; RV32-ZVFHMIN-NEXT:  .LBB25_61: # %else170
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 5
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_123
+; RV32-ZVFHMIN-NEXT:  .LBB25_62: # %else173
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 4
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_124
+; RV32-ZVFHMIN-NEXT:  .LBB25_63: # %else176
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 3
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_125
+; RV32-ZVFHMIN-NEXT:  .LBB25_64: # %else179
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 2
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_126
+; RV32-ZVFHMIN-NEXT:  .LBB25_65: # %else182
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 1
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_127
+; RV32-ZVFHMIN-NEXT:  .LBB25_66: # %else185
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_128
+; RV32-ZVFHMIN-NEXT:  .LBB25_67: # %else188
+; RV32-ZVFHMIN-NEXT:    ret
+; RV32-ZVFHMIN-NEXT:  .LBB25_68: # %cond.load
+; RV32-ZVFHMIN-NEXT:    lh a1, 0(a0)
+; RV32-ZVFHMIN-NEXT:    fmv.x.h a3, fa5
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.v.x v8, a3
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v8, a1
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 2
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB25_2
+; RV32-ZVFHMIN-NEXT:  .LBB25_69: # %cond.load1
+; RV32-ZVFHMIN-NEXT:    lh a1, 2(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 1
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 4
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB25_3
+; RV32-ZVFHMIN-NEXT:  .LBB25_70: # %cond.load4
+; RV32-ZVFHMIN-NEXT:    lh a1, 4(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 2
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 8
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB25_4
+; RV32-ZVFHMIN-NEXT:  .LBB25_71: # %cond.load7
+; RV32-ZVFHMIN-NEXT:    lh a1, 6(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 3
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 16
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB25_5
+; RV32-ZVFHMIN-NEXT:  .LBB25_72: # %cond.load10
+; RV32-ZVFHMIN-NEXT:    lh a1, 8(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 4
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 32
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB25_6
+; RV32-ZVFHMIN-NEXT:  .LBB25_73: # %cond.load13
+; RV32-ZVFHMIN-NEXT:    lh a1, 10(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 5
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 64
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB25_7
+; RV32-ZVFHMIN-NEXT:  .LBB25_74: # %cond.load16
+; RV32-ZVFHMIN-NEXT:    lh a1, 12(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 6
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 128
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB25_8
+; RV32-ZVFHMIN-NEXT:  .LBB25_75: # %cond.load19
+; RV32-ZVFHMIN-NEXT:    lh a1, 14(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 7
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 256
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB25_9
+; RV32-ZVFHMIN-NEXT:  .LBB25_76: # %cond.load22
+; RV32-ZVFHMIN-NEXT:    lh a1, 16(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 9, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 8
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 512
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB25_10
+; RV32-ZVFHMIN-NEXT:  .LBB25_77: # %cond.load25
+; RV32-ZVFHMIN-NEXT:    lh a1, 18(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 10, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 9
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 1024
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB25_11
+; RV32-ZVFHMIN-NEXT:  .LBB25_78: # %cond.load28
+; RV32-ZVFHMIN-NEXT:    lh a1, 20(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 11, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 10
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 20
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_12
+; RV32-ZVFHMIN-NEXT:  .LBB25_79: # %cond.load31
+; RV32-ZVFHMIN-NEXT:    lh a1, 22(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 11
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 19
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_13
+; RV32-ZVFHMIN-NEXT:  .LBB25_80: # %cond.load34
+; RV32-ZVFHMIN-NEXT:    lh a1, 24(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 13, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 12
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 18
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_14
+; RV32-ZVFHMIN-NEXT:  .LBB25_81: # %cond.load37
+; RV32-ZVFHMIN-NEXT:    lh a1, 26(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 14, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 13
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 17
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_15
+; RV32-ZVFHMIN-NEXT:  .LBB25_82: # %cond.load40
+; RV32-ZVFHMIN-NEXT:    lh a1, 28(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 15, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 14
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 16
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_16
+; RV32-ZVFHMIN-NEXT:  .LBB25_83: # %cond.load43
+; RV32-ZVFHMIN-NEXT:    lh a1, 30(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 15
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 15
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_17
+; RV32-ZVFHMIN-NEXT:  .LBB25_84: # %cond.load46
+; RV32-ZVFHMIN-NEXT:    lh a1, 32(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 17, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 16
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 14
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_18
+; RV32-ZVFHMIN-NEXT:  .LBB25_85: # %cond.load49
+; RV32-ZVFHMIN-NEXT:    lh a1, 34(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 18, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 17
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 13
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_19
+; RV32-ZVFHMIN-NEXT:  .LBB25_86: # %cond.load52
+; RV32-ZVFHMIN-NEXT:    lh a1, 36(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 19, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 18
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 12
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_20
+; RV32-ZVFHMIN-NEXT:  .LBB25_87: # %cond.load55
+; RV32-ZVFHMIN-NEXT:    lh a1, 38(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 20, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 19
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 11
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_21
+; RV32-ZVFHMIN-NEXT:  .LBB25_88: # %cond.load58
+; RV32-ZVFHMIN-NEXT:    lh a1, 40(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 21, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 20
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 10
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_22
+; RV32-ZVFHMIN-NEXT:  .LBB25_89: # %cond.load61
+; RV32-ZVFHMIN-NEXT:    lh a1, 42(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 22, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 21
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 9
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_23
+; RV32-ZVFHMIN-NEXT:  .LBB25_90: # %cond.load64
+; RV32-ZVFHMIN-NEXT:    lh a1, 44(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 23, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 22
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 8
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_24
+; RV32-ZVFHMIN-NEXT:  .LBB25_91: # %cond.load67
+; RV32-ZVFHMIN-NEXT:    lh a1, 46(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 24, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 23
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 7
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_25
+; RV32-ZVFHMIN-NEXT:  .LBB25_92: # %cond.load70
+; RV32-ZVFHMIN-NEXT:    lh a1, 48(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 25, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 24
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 6
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_26
+; RV32-ZVFHMIN-NEXT:  .LBB25_93: # %cond.load73
+; RV32-ZVFHMIN-NEXT:    lh a1, 50(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 26, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 25
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 5
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_27
+; RV32-ZVFHMIN-NEXT:  .LBB25_94: # %cond.load76
+; RV32-ZVFHMIN-NEXT:    lh a1, 52(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 27, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 26
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 4
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_28
+; RV32-ZVFHMIN-NEXT:  .LBB25_95: # %cond.load79
+; RV32-ZVFHMIN-NEXT:    lh a1, 54(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 28, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 27
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 3
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_29
+; RV32-ZVFHMIN-NEXT:    j .LBB25_30
+; RV32-ZVFHMIN-NEXT:  .LBB25_96: # %cond.load91
+; RV32-ZVFHMIN-NEXT:    lh a2, 62(a0)
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 32
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 31
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 1
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB25_36
+; RV32-ZVFHMIN-NEXT:  .LBB25_97: # %cond.load94
+; RV32-ZVFHMIN-NEXT:    lh a2, 64(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 33
+; RV32-ZVFHMIN-NEXT:    li a3, 32
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 2
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB25_37
+; RV32-ZVFHMIN-NEXT:  .LBB25_98: # %cond.load97
+; RV32-ZVFHMIN-NEXT:    lh a2, 66(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 34
+; RV32-ZVFHMIN-NEXT:    li a3, 33
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 4
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB25_38
+; RV32-ZVFHMIN-NEXT:  .LBB25_99: # %cond.load100
+; RV32-ZVFHMIN-NEXT:    lh a2, 68(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 35
+; RV32-ZVFHMIN-NEXT:    li a3, 34
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 8
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB25_39
+; RV32-ZVFHMIN-NEXT:  .LBB25_100: # %cond.load103
+; RV32-ZVFHMIN-NEXT:    lh a2, 70(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 36
+; RV32-ZVFHMIN-NEXT:    li a3, 35
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 16
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB25_40
+; RV32-ZVFHMIN-NEXT:  .LBB25_101: # %cond.load106
+; RV32-ZVFHMIN-NEXT:    lh a2, 72(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 37
+; RV32-ZVFHMIN-NEXT:    li a3, 36
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 32
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB25_41
+; RV32-ZVFHMIN-NEXT:  .LBB25_102: # %cond.load109
+; RV32-ZVFHMIN-NEXT:    lh a2, 74(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 38
+; RV32-ZVFHMIN-NEXT:    li a3, 37
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 64
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB25_42
+; RV32-ZVFHMIN-NEXT:  .LBB25_103: # %cond.load112
+; RV32-ZVFHMIN-NEXT:    lh a2, 76(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 39
+; RV32-ZVFHMIN-NEXT:    li a3, 38
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 128
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB25_43
+; RV32-ZVFHMIN-NEXT:  .LBB25_104: # %cond.load115
+; RV32-ZVFHMIN-NEXT:    lh a2, 78(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 40
+; RV32-ZVFHMIN-NEXT:    li a3, 39
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 256
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB25_44
+; RV32-ZVFHMIN-NEXT:  .LBB25_105: # %cond.load118
+; RV32-ZVFHMIN-NEXT:    lh a2, 80(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 41
+; RV32-ZVFHMIN-NEXT:    li a3, 40
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 512
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB25_45
+; RV32-ZVFHMIN-NEXT:  .LBB25_106: # %cond.load121
+; RV32-ZVFHMIN-NEXT:    lh a2, 82(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 42
+; RV32-ZVFHMIN-NEXT:    li a3, 41
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 1024
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB25_46
+; RV32-ZVFHMIN-NEXT:  .LBB25_107: # %cond.load124
+; RV32-ZVFHMIN-NEXT:    lh a2, 84(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 43
+; RV32-ZVFHMIN-NEXT:    li a3, 42
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 20
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_47
+; RV32-ZVFHMIN-NEXT:  .LBB25_108: # %cond.load127
+; RV32-ZVFHMIN-NEXT:    lh a2, 86(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 44
+; RV32-ZVFHMIN-NEXT:    li a3, 43
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 19
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_48
+; RV32-ZVFHMIN-NEXT:  .LBB25_109: # %cond.load130
+; RV32-ZVFHMIN-NEXT:    lh a2, 88(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 45
+; RV32-ZVFHMIN-NEXT:    li a3, 44
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 18
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_49
+; RV32-ZVFHMIN-NEXT:  .LBB25_110: # %cond.load133
+; RV32-ZVFHMIN-NEXT:    lh a2, 90(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 46
+; RV32-ZVFHMIN-NEXT:    li a3, 45
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 17
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_50
+; RV32-ZVFHMIN-NEXT:  .LBB25_111: # %cond.load136
+; RV32-ZVFHMIN-NEXT:    lh a2, 92(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 47
+; RV32-ZVFHMIN-NEXT:    li a3, 46
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 16
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_51
+; RV32-ZVFHMIN-NEXT:  .LBB25_112: # %cond.load139
+; RV32-ZVFHMIN-NEXT:    lh a2, 94(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 48
+; RV32-ZVFHMIN-NEXT:    li a3, 47
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 15
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_52
+; RV32-ZVFHMIN-NEXT:  .LBB25_113: # %cond.load142
+; RV32-ZVFHMIN-NEXT:    lh a2, 96(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 49
+; RV32-ZVFHMIN-NEXT:    li a3, 48
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 14
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_53
+; RV32-ZVFHMIN-NEXT:  .LBB25_114: # %cond.load145
+; RV32-ZVFHMIN-NEXT:    lh a2, 98(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 50
+; RV32-ZVFHMIN-NEXT:    li a3, 49
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 13
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_54
+; RV32-ZVFHMIN-NEXT:  .LBB25_115: # %cond.load148
+; RV32-ZVFHMIN-NEXT:    lh a2, 100(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 51
+; RV32-ZVFHMIN-NEXT:    li a3, 50
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 12
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_55
+; RV32-ZVFHMIN-NEXT:  .LBB25_116: # %cond.load151
+; RV32-ZVFHMIN-NEXT:    lh a2, 102(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 52
+; RV32-ZVFHMIN-NEXT:    li a3, 51
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 11
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_56
+; RV32-ZVFHMIN-NEXT:  .LBB25_117: # %cond.load154
+; RV32-ZVFHMIN-NEXT:    lh a2, 104(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 53
+; RV32-ZVFHMIN-NEXT:    li a3, 52
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 10
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_57
+; RV32-ZVFHMIN-NEXT:  .LBB25_118: # %cond.load157
+; RV32-ZVFHMIN-NEXT:    lh a2, 106(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 54
+; RV32-ZVFHMIN-NEXT:    li a3, 53
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 9
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_58
+; RV32-ZVFHMIN-NEXT:  .LBB25_119: # %cond.load160
+; RV32-ZVFHMIN-NEXT:    lh a2, 108(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 55
+; RV32-ZVFHMIN-NEXT:    li a3, 54
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 8
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_59
+; RV32-ZVFHMIN-NEXT:  .LBB25_120: # %cond.load163
+; RV32-ZVFHMIN-NEXT:    lh a2, 110(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 56
+; RV32-ZVFHMIN-NEXT:    li a3, 55
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 7
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_60
+; RV32-ZVFHMIN-NEXT:  .LBB25_121: # %cond.load166
+; RV32-ZVFHMIN-NEXT:    lh a2, 112(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 57
+; RV32-ZVFHMIN-NEXT:    li a3, 56
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 6
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_61
+; RV32-ZVFHMIN-NEXT:  .LBB25_122: # %cond.load169
+; RV32-ZVFHMIN-NEXT:    lh a2, 114(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 58
+; RV32-ZVFHMIN-NEXT:    li a3, 57
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 5
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_62
+; RV32-ZVFHMIN-NEXT:  .LBB25_123: # %cond.load172
+; RV32-ZVFHMIN-NEXT:    lh a2, 116(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 59
+; RV32-ZVFHMIN-NEXT:    li a3, 58
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 4
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_63
+; RV32-ZVFHMIN-NEXT:  .LBB25_124: # %cond.load175
+; RV32-ZVFHMIN-NEXT:    lh a2, 118(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 60
+; RV32-ZVFHMIN-NEXT:    li a3, 59
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 3
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_64
+; RV32-ZVFHMIN-NEXT:  .LBB25_125: # %cond.load178
+; RV32-ZVFHMIN-NEXT:    lh a2, 120(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 61
+; RV32-ZVFHMIN-NEXT:    li a3, 60
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 2
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_65
+; RV32-ZVFHMIN-NEXT:  .LBB25_126: # %cond.load181
+; RV32-ZVFHMIN-NEXT:    lh a2, 122(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 62
+; RV32-ZVFHMIN-NEXT:    li a3, 61
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 1
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_66
+; RV32-ZVFHMIN-NEXT:  .LBB25_127: # %cond.load184
+; RV32-ZVFHMIN-NEXT:    lh a2, 124(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 63
+; RV32-ZVFHMIN-NEXT:    li a3, 62
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_67
+; RV32-ZVFHMIN-NEXT:  .LBB25_128: # %cond.load187
+; RV32-ZVFHMIN-NEXT:    lh a0, 126(a0)
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a0
+; RV32-ZVFHMIN-NEXT:    li a0, 63
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a0
+; RV32-ZVFHMIN-NEXT:    ret
+;
+; RV64-ZVFHMIN-LABEL: masked_load_v64f16:
+; RV64-ZVFHMIN:       # %bb.0:
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v0
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 1
+; RV64-ZVFHMIN-NEXT:    # implicit-def: $v8m8
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB25_65
+; RV64-ZVFHMIN-NEXT:  # %bb.1: # %else
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 2
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB25_66
+; RV64-ZVFHMIN-NEXT:  .LBB25_2: # %else2
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 4
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB25_67
+; RV64-ZVFHMIN-NEXT:  .LBB25_3: # %else5
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 8
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB25_68
+; RV64-ZVFHMIN-NEXT:  .LBB25_4: # %else8
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 16
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB25_69
+; RV64-ZVFHMIN-NEXT:  .LBB25_5: # %else11
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 32
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB25_70
+; RV64-ZVFHMIN-NEXT:  .LBB25_6: # %else14
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 64
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB25_71
+; RV64-ZVFHMIN-NEXT:  .LBB25_7: # %else17
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 128
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB25_72
+; RV64-ZVFHMIN-NEXT:  .LBB25_8: # %else20
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 256
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB25_73
+; RV64-ZVFHMIN-NEXT:  .LBB25_9: # %else23
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 512
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB25_74
+; RV64-ZVFHMIN-NEXT:  .LBB25_10: # %else26
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 1024
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB25_75
+; RV64-ZVFHMIN-NEXT:  .LBB25_11: # %else29
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 52
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_76
+; RV64-ZVFHMIN-NEXT:  .LBB25_12: # %else32
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 51
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_77
+; RV64-ZVFHMIN-NEXT:  .LBB25_13: # %else35
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 50
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_78
+; RV64-ZVFHMIN-NEXT:  .LBB25_14: # %else38
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 49
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_79
+; RV64-ZVFHMIN-NEXT:  .LBB25_15: # %else41
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 48
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_80
+; RV64-ZVFHMIN-NEXT:  .LBB25_16: # %else44
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 47
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_81
+; RV64-ZVFHMIN-NEXT:  .LBB25_17: # %else47
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 46
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_82
+; RV64-ZVFHMIN-NEXT:  .LBB25_18: # %else50
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 45
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_83
+; RV64-ZVFHMIN-NEXT:  .LBB25_19: # %else53
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 44
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_84
+; RV64-ZVFHMIN-NEXT:  .LBB25_20: # %else56
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 43
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_85
+; RV64-ZVFHMIN-NEXT:  .LBB25_21: # %else59
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 42
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_86
+; RV64-ZVFHMIN-NEXT:  .LBB25_22: # %else62
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 41
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_87
+; RV64-ZVFHMIN-NEXT:  .LBB25_23: # %else65
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 40
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_88
+; RV64-ZVFHMIN-NEXT:  .LBB25_24: # %else68
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 39
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_89
+; RV64-ZVFHMIN-NEXT:  .LBB25_25: # %else71
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 38
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_90
+; RV64-ZVFHMIN-NEXT:  .LBB25_26: # %else74
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 37
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_91
+; RV64-ZVFHMIN-NEXT:  .LBB25_27: # %else77
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 36
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_92
+; RV64-ZVFHMIN-NEXT:  .LBB25_28: # %else80
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 35
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_93
+; RV64-ZVFHMIN-NEXT:  .LBB25_29: # %else83
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 34
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_94
+; RV64-ZVFHMIN-NEXT:  .LBB25_30: # %else86
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 33
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_95
+; RV64-ZVFHMIN-NEXT:  .LBB25_31: # %else89
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 32
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_96
+; RV64-ZVFHMIN-NEXT:  .LBB25_32: # %else92
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 31
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_97
+; RV64-ZVFHMIN-NEXT:  .LBB25_33: # %else95
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 30
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_98
+; RV64-ZVFHMIN-NEXT:  .LBB25_34: # %else98
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 29
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_99
+; RV64-ZVFHMIN-NEXT:  .LBB25_35: # %else101
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 28
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_100
+; RV64-ZVFHMIN-NEXT:  .LBB25_36: # %else104
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 27
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_101
+; RV64-ZVFHMIN-NEXT:  .LBB25_37: # %else107
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 26
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_102
+; RV64-ZVFHMIN-NEXT:  .LBB25_38: # %else110
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 25
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_103
+; RV64-ZVFHMIN-NEXT:  .LBB25_39: # %else113
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 24
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_104
+; RV64-ZVFHMIN-NEXT:  .LBB25_40: # %else116
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 23
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_105
+; RV64-ZVFHMIN-NEXT:  .LBB25_41: # %else119
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 22
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_106
+; RV64-ZVFHMIN-NEXT:  .LBB25_42: # %else122
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 21
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_107
+; RV64-ZVFHMIN-NEXT:  .LBB25_43: # %else125
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 20
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_108
+; RV64-ZVFHMIN-NEXT:  .LBB25_44: # %else128
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 19
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_109
+; RV64-ZVFHMIN-NEXT:  .LBB25_45: # %else131
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 18
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_110
+; RV64-ZVFHMIN-NEXT:  .LBB25_46: # %else134
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 17
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_111
+; RV64-ZVFHMIN-NEXT:  .LBB25_47: # %else137
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 16
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_112
+; RV64-ZVFHMIN-NEXT:  .LBB25_48: # %else140
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 15
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_113
+; RV64-ZVFHMIN-NEXT:  .LBB25_49: # %else143
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 14
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_114
+; RV64-ZVFHMIN-NEXT:  .LBB25_50: # %else146
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 13
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_115
+; RV64-ZVFHMIN-NEXT:  .LBB25_51: # %else149
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 12
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_116
+; RV64-ZVFHMIN-NEXT:  .LBB25_52: # %else152
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 11
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_117
+; RV64-ZVFHMIN-NEXT:  .LBB25_53: # %else155
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 10
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_118
+; RV64-ZVFHMIN-NEXT:  .LBB25_54: # %else158
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 9
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_119
+; RV64-ZVFHMIN-NEXT:  .LBB25_55: # %else161
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 8
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_120
+; RV64-ZVFHMIN-NEXT:  .LBB25_56: # %else164
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 7
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_121
+; RV64-ZVFHMIN-NEXT:  .LBB25_57: # %else167
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 6
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_122
+; RV64-ZVFHMIN-NEXT:  .LBB25_58: # %else170
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 5
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_123
+; RV64-ZVFHMIN-NEXT:  .LBB25_59: # %else173
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 4
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_124
+; RV64-ZVFHMIN-NEXT:  .LBB25_60: # %else176
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 3
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_125
+; RV64-ZVFHMIN-NEXT:  .LBB25_61: # %else179
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 2
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_126
+; RV64-ZVFHMIN-NEXT:  .LBB25_62: # %else182
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 1
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_127
+; RV64-ZVFHMIN-NEXT:  .LBB25_63: # %else185
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB25_128
+; RV64-ZVFHMIN-NEXT:  .LBB25_64: # %else188
+; RV64-ZVFHMIN-NEXT:    ret
+; RV64-ZVFHMIN-NEXT:  .LBB25_65: # %cond.load
+; RV64-ZVFHMIN-NEXT:    lh a2, 0(a0)
+; RV64-ZVFHMIN-NEXT:    fmv.x.h a3, fa5
+; RV64-ZVFHMIN-NEXT:    li a4, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.v.x v8, a3
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v8, a2
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 2
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB25_2
+; RV64-ZVFHMIN-NEXT:  .LBB25_66: # %cond.load1
+; RV64-ZVFHMIN-NEXT:    lh a2, 2(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 1
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 4
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB25_3
+; RV64-ZVFHMIN-NEXT:  .LBB25_67: # %cond.load4
+; RV64-ZVFHMIN-NEXT:    lh a2, 4(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 2
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 8
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB25_4
+; RV64-ZVFHMIN-NEXT:  .LBB25_68: # %cond.load7
+; RV64-ZVFHMIN-NEXT:    lh a2, 6(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 3
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 16
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB25_5
+; RV64-ZVFHMIN-NEXT:  .LBB25_69: # %cond.load10
+; RV64-ZVFHMIN-NEXT:    lh a2, 8(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 4
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 32
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB25_6
+; RV64-ZVFHMIN-NEXT:  .LBB25_70: # %cond.load13
+; RV64-ZVFHMIN-NEXT:    lh a2, 10(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 5
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 64
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB25_7
+; RV64-ZVFHMIN-NEXT:  .LBB25_71: # %cond.load16
+; RV64-ZVFHMIN-NEXT:    lh a2, 12(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 6
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 128
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB25_8
+; RV64-ZVFHMIN-NEXT:  .LBB25_72: # %cond.load19
+; RV64-ZVFHMIN-NEXT:    lh a2, 14(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 7
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 256
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB25_9
+; RV64-ZVFHMIN-NEXT:  .LBB25_73: # %cond.load22
+; RV64-ZVFHMIN-NEXT:    lh a2, 16(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 9, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 8
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 512
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB25_10
+; RV64-ZVFHMIN-NEXT:  .LBB25_74: # %cond.load25
+; RV64-ZVFHMIN-NEXT:    lh a2, 18(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 10, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 9
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 1024
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB25_11
+; RV64-ZVFHMIN-NEXT:  .LBB25_75: # %cond.load28
+; RV64-ZVFHMIN-NEXT:    lh a2, 20(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 11, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 10
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 52
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_12
+; RV64-ZVFHMIN-NEXT:  .LBB25_76: # %cond.load31
+; RV64-ZVFHMIN-NEXT:    lh a2, 22(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 11
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 51
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_13
+; RV64-ZVFHMIN-NEXT:  .LBB25_77: # %cond.load34
+; RV64-ZVFHMIN-NEXT:    lh a2, 24(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 13, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 12
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 50
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_14
+; RV64-ZVFHMIN-NEXT:  .LBB25_78: # %cond.load37
+; RV64-ZVFHMIN-NEXT:    lh a2, 26(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 14, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 13
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 49
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_15
+; RV64-ZVFHMIN-NEXT:  .LBB25_79: # %cond.load40
+; RV64-ZVFHMIN-NEXT:    lh a2, 28(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 15, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 14
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 48
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_16
+; RV64-ZVFHMIN-NEXT:  .LBB25_80: # %cond.load43
+; RV64-ZVFHMIN-NEXT:    lh a2, 30(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 15
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 47
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_17
+; RV64-ZVFHMIN-NEXT:  .LBB25_81: # %cond.load46
+; RV64-ZVFHMIN-NEXT:    lh a2, 32(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 17, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 16
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 46
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_18
+; RV64-ZVFHMIN-NEXT:  .LBB25_82: # %cond.load49
+; RV64-ZVFHMIN-NEXT:    lh a2, 34(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 18, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 17
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 45
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_19
+; RV64-ZVFHMIN-NEXT:  .LBB25_83: # %cond.load52
+; RV64-ZVFHMIN-NEXT:    lh a2, 36(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 19, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 18
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 44
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_20
+; RV64-ZVFHMIN-NEXT:  .LBB25_84: # %cond.load55
+; RV64-ZVFHMIN-NEXT:    lh a2, 38(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 20, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 19
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 43
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_21
+; RV64-ZVFHMIN-NEXT:  .LBB25_85: # %cond.load58
+; RV64-ZVFHMIN-NEXT:    lh a2, 40(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 21, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 20
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 42
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_22
+; RV64-ZVFHMIN-NEXT:  .LBB25_86: # %cond.load61
+; RV64-ZVFHMIN-NEXT:    lh a2, 42(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 22, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 21
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 41
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_23
+; RV64-ZVFHMIN-NEXT:  .LBB25_87: # %cond.load64
+; RV64-ZVFHMIN-NEXT:    lh a2, 44(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 23, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 22
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 40
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_24
+; RV64-ZVFHMIN-NEXT:  .LBB25_88: # %cond.load67
+; RV64-ZVFHMIN-NEXT:    lh a2, 46(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 24, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 23
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 39
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_25
+; RV64-ZVFHMIN-NEXT:  .LBB25_89: # %cond.load70
+; RV64-ZVFHMIN-NEXT:    lh a2, 48(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 25, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 24
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 38
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_26
+; RV64-ZVFHMIN-NEXT:  .LBB25_90: # %cond.load73
+; RV64-ZVFHMIN-NEXT:    lh a2, 50(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 26, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 25
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 37
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_27
+; RV64-ZVFHMIN-NEXT:  .LBB25_91: # %cond.load76
+; RV64-ZVFHMIN-NEXT:    lh a2, 52(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 27, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 26
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 36
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_28
+; RV64-ZVFHMIN-NEXT:  .LBB25_92: # %cond.load79
+; RV64-ZVFHMIN-NEXT:    lh a2, 54(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 28, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 27
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 35
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_29
+; RV64-ZVFHMIN-NEXT:  .LBB25_93: # %cond.load82
+; RV64-ZVFHMIN-NEXT:    lh a2, 56(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 29, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 28
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 34
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_30
+; RV64-ZVFHMIN-NEXT:  .LBB25_94: # %cond.load85
+; RV64-ZVFHMIN-NEXT:    lh a2, 58(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 30, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 29
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 33
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_31
+; RV64-ZVFHMIN-NEXT:  .LBB25_95: # %cond.load88
+; RV64-ZVFHMIN-NEXT:    lh a2, 60(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 31, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 30
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 32
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_32
+; RV64-ZVFHMIN-NEXT:  .LBB25_96: # %cond.load91
+; RV64-ZVFHMIN-NEXT:    lh a2, 62(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 32
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 31
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 31
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_33
+; RV64-ZVFHMIN-NEXT:  .LBB25_97: # %cond.load94
+; RV64-ZVFHMIN-NEXT:    lh a2, 64(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 33
+; RV64-ZVFHMIN-NEXT:    li a3, 32
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 30
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_34
+; RV64-ZVFHMIN-NEXT:  .LBB25_98: # %cond.load97
+; RV64-ZVFHMIN-NEXT:    lh a2, 66(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 34
+; RV64-ZVFHMIN-NEXT:    li a3, 33
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 29
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_35
+; RV64-ZVFHMIN-NEXT:  .LBB25_99: # %cond.load100
+; RV64-ZVFHMIN-NEXT:    lh a2, 68(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 35
+; RV64-ZVFHMIN-NEXT:    li a3, 34
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 28
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_36
+; RV64-ZVFHMIN-NEXT:  .LBB25_100: # %cond.load103
+; RV64-ZVFHMIN-NEXT:    lh a2, 70(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 36
+; RV64-ZVFHMIN-NEXT:    li a3, 35
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 27
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_37
+; RV64-ZVFHMIN-NEXT:  .LBB25_101: # %cond.load106
+; RV64-ZVFHMIN-NEXT:    lh a2, 72(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 37
+; RV64-ZVFHMIN-NEXT:    li a3, 36
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 26
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_38
+; RV64-ZVFHMIN-NEXT:  .LBB25_102: # %cond.load109
+; RV64-ZVFHMIN-NEXT:    lh a2, 74(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 38
+; RV64-ZVFHMIN-NEXT:    li a3, 37
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 25
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_39
+; RV64-ZVFHMIN-NEXT:  .LBB25_103: # %cond.load112
+; RV64-ZVFHMIN-NEXT:    lh a2, 76(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 39
+; RV64-ZVFHMIN-NEXT:    li a3, 38
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 24
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_40
+; RV64-ZVFHMIN-NEXT:  .LBB25_104: # %cond.load115
+; RV64-ZVFHMIN-NEXT:    lh a2, 78(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 40
+; RV64-ZVFHMIN-NEXT:    li a3, 39
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 23
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_41
+; RV64-ZVFHMIN-NEXT:  .LBB25_105: # %cond.load118
+; RV64-ZVFHMIN-NEXT:    lh a2, 80(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 41
+; RV64-ZVFHMIN-NEXT:    li a3, 40
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 22
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_42
+; RV64-ZVFHMIN-NEXT:  .LBB25_106: # %cond.load121
+; RV64-ZVFHMIN-NEXT:    lh a2, 82(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 42
+; RV64-ZVFHMIN-NEXT:    li a3, 41
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 21
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_43
+; RV64-ZVFHMIN-NEXT:  .LBB25_107: # %cond.load124
+; RV64-ZVFHMIN-NEXT:    lh a2, 84(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 43
+; RV64-ZVFHMIN-NEXT:    li a3, 42
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 20
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_44
+; RV64-ZVFHMIN-NEXT:  .LBB25_108: # %cond.load127
+; RV64-ZVFHMIN-NEXT:    lh a2, 86(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 44
+; RV64-ZVFHMIN-NEXT:    li a3, 43
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 19
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_45
+; RV64-ZVFHMIN-NEXT:  .LBB25_109: # %cond.load130
+; RV64-ZVFHMIN-NEXT:    lh a2, 88(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 45
+; RV64-ZVFHMIN-NEXT:    li a3, 44
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 18
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_46
+; RV64-ZVFHMIN-NEXT:  .LBB25_110: # %cond.load133
+; RV64-ZVFHMIN-NEXT:    lh a2, 90(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 46
+; RV64-ZVFHMIN-NEXT:    li a3, 45
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 17
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_47
+; RV64-ZVFHMIN-NEXT:  .LBB25_111: # %cond.load136
+; RV64-ZVFHMIN-NEXT:    lh a2, 92(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 47
+; RV64-ZVFHMIN-NEXT:    li a3, 46
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 16
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_48
+; RV64-ZVFHMIN-NEXT:  .LBB25_112: # %cond.load139
+; RV64-ZVFHMIN-NEXT:    lh a2, 94(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 48
+; RV64-ZVFHMIN-NEXT:    li a3, 47
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 15
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_49
+; RV64-ZVFHMIN-NEXT:  .LBB25_113: # %cond.load142
+; RV64-ZVFHMIN-NEXT:    lh a2, 96(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 49
+; RV64-ZVFHMIN-NEXT:    li a3, 48
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 14
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_50
+; RV64-ZVFHMIN-NEXT:  .LBB25_114: # %cond.load145
+; RV64-ZVFHMIN-NEXT:    lh a2, 98(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 50
+; RV64-ZVFHMIN-NEXT:    li a3, 49
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 13
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_51
+; RV64-ZVFHMIN-NEXT:  .LBB25_115: # %cond.load148
+; RV64-ZVFHMIN-NEXT:    lh a2, 100(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 51
+; RV64-ZVFHMIN-NEXT:    li a3, 50
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 12
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_52
+; RV64-ZVFHMIN-NEXT:  .LBB25_116: # %cond.load151
+; RV64-ZVFHMIN-NEXT:    lh a2, 102(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 52
+; RV64-ZVFHMIN-NEXT:    li a3, 51
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 11
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_53
+; RV64-ZVFHMIN-NEXT:  .LBB25_117: # %cond.load154
+; RV64-ZVFHMIN-NEXT:    lh a2, 104(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 53
+; RV64-ZVFHMIN-NEXT:    li a3, 52
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 10
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_54
+; RV64-ZVFHMIN-NEXT:  .LBB25_118: # %cond.load157
+; RV64-ZVFHMIN-NEXT:    lh a2, 106(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 54
+; RV64-ZVFHMIN-NEXT:    li a3, 53
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 9
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_55
+; RV64-ZVFHMIN-NEXT:  .LBB25_119: # %cond.load160
+; RV64-ZVFHMIN-NEXT:    lh a2, 108(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 55
+; RV64-ZVFHMIN-NEXT:    li a3, 54
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 8
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_56
+; RV64-ZVFHMIN-NEXT:  .LBB25_120: # %cond.load163
+; RV64-ZVFHMIN-NEXT:    lh a2, 110(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 56
+; RV64-ZVFHMIN-NEXT:    li a3, 55
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 7
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_57
+; RV64-ZVFHMIN-NEXT:  .LBB25_121: # %cond.load166
+; RV64-ZVFHMIN-NEXT:    lh a2, 112(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 57
+; RV64-ZVFHMIN-NEXT:    li a3, 56
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 6
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_58
+; RV64-ZVFHMIN-NEXT:  .LBB25_122: # %cond.load169
+; RV64-ZVFHMIN-NEXT:    lh a2, 114(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 58
+; RV64-ZVFHMIN-NEXT:    li a3, 57
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 5
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_59
+; RV64-ZVFHMIN-NEXT:  .LBB25_123: # %cond.load172
+; RV64-ZVFHMIN-NEXT:    lh a2, 116(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 59
+; RV64-ZVFHMIN-NEXT:    li a3, 58
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 4
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_60
+; RV64-ZVFHMIN-NEXT:  .LBB25_124: # %cond.load175
+; RV64-ZVFHMIN-NEXT:    lh a2, 118(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 60
+; RV64-ZVFHMIN-NEXT:    li a3, 59
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 3
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_61
+; RV64-ZVFHMIN-NEXT:  .LBB25_125: # %cond.load178
+; RV64-ZVFHMIN-NEXT:    lh a2, 120(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 61
+; RV64-ZVFHMIN-NEXT:    li a3, 60
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 2
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_62
+; RV64-ZVFHMIN-NEXT:  .LBB25_126: # %cond.load181
+; RV64-ZVFHMIN-NEXT:    lh a2, 122(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 62
+; RV64-ZVFHMIN-NEXT:    li a3, 61
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 1
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_63
+; RV64-ZVFHMIN-NEXT:  .LBB25_127: # %cond.load184
+; RV64-ZVFHMIN-NEXT:    lh a2, 124(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 63
+; RV64-ZVFHMIN-NEXT:    li a3, 62
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB25_64
+; RV64-ZVFHMIN-NEXT:  .LBB25_128: # %cond.load187
+; RV64-ZVFHMIN-NEXT:    lh a0, 126(a0)
+; RV64-ZVFHMIN-NEXT:    li a1, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a0
+; RV64-ZVFHMIN-NEXT:    li a0, 63
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a0
+; RV64-ZVFHMIN-NEXT:    ret
   %load = call <64 x half> @llvm.masked.load.v64f16(ptr %a, i32 8, <64 x i1> %mask, <64 x half> undef)
-  store <64 x half> %load, ptr %res_ptr
-  ret void
+  ret <64 x half> %load
 }
-declare <64 x half> @llvm.masked.load.v64f16(ptr, i32, <64 x i1>, <64 x half>)
 
-define void @masked_load_v64f32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <64 x float> @masked_load_v64f32(ptr %a, <64 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v64f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a3, a1, 128
-; CHECK-NEXT:    li a4, 32
-; CHECK-NEXT:    vsetvli zero, a4, e32, m8, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a1)
-; CHECK-NEXT:    vle32.v v24, (a3)
-; CHECK-NEXT:    fmv.w.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v8, v16, fa5
-; CHECK-NEXT:    vmfeq.vf v0, v24, fa5
-; CHECK-NEXT:    addi a1, a0, 128
-; CHECK-NEXT:    vle32.v v16, (a1), v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0), v0.t
-; CHECK-NEXT:    vse32.v v8, (a2)
-; CHECK-NEXT:    addi a0, a2, 128
-; CHECK-NEXT:    vse32.v v16, (a0)
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
+; CHECK-NEXT:    vslidedown.vi v0, v0, 4
+; CHECK-NEXT:    addi a0, a0, 128
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT:    vle32.v v16, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <64 x float>, ptr %m_ptr
-  %mask = fcmp oeq <64 x float> %m, zeroinitializer
   %load = call <64 x float> @llvm.masked.load.v64f32(ptr %a, i32 8, <64 x i1> %mask, <64 x float> undef)
-  store <64 x float> %load, ptr %res_ptr
-  ret void
+  ret <64 x float> %load
 }
-declare <64 x float> @llvm.masked.load.v64f32(ptr, i32, <64 x i1>, <64 x float>)
 
-define void @masked_load_v128f16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
-; CHECK-LABEL: masked_load_v128f16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a3, a1, 128
-; CHECK-NEXT:    li a4, 64
-; CHECK-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
-; CHECK-NEXT:    vle16.v v16, (a1)
-; CHECK-NEXT:    vle16.v v24, (a3)
-; CHECK-NEXT:    fmv.h.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v8, v16, fa5
-; CHECK-NEXT:    vmfeq.vf v0, v24, fa5
-; CHECK-NEXT:    addi a1, a0, 128
-; CHECK-NEXT:    vle16.v v16, (a1), v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vle16.v v8, (a0), v0.t
-; CHECK-NEXT:    vse16.v v8, (a2)
-; CHECK-NEXT:    addi a0, a2, 128
-; CHECK-NEXT:    vse16.v v16, (a0)
-; CHECK-NEXT:    ret
-  %m = load <128 x half>, ptr %m_ptr
-  %mask = fcmp oeq <128 x half> %m, zeroinitializer
+define <128 x bfloat> @masked_load_v128bf16(ptr %a, <128 x i1> %mask) {
+; RV32-LABEL: masked_load_v128bf16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vmv.x.s a2, v0
+; RV32-NEXT:    andi a1, a2, 1
+; RV32-NEXT:    beqz a1, .LBB27_2
+; RV32-NEXT:  # %bb.1: # %cond.load
+; RV32-NEXT:    lh a1, 0(a0)
+; RV32-NEXT:    fmv.x.h a3, fa5
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT:    vmv.v.x v8, a3
+; RV32-NEXT:    vsetvli zero, zero, e16, m8, tu, ma
+; RV32-NEXT:    vmv.s.x v8, a1
+; RV32-NEXT:    andi a1, a2, 2
+; RV32-NEXT:    bnez a1, .LBB27_3
+; RV32-NEXT:    j .LBB27_4
+; RV32-NEXT:  .LBB27_2:
+; RV32-NEXT:    # implicit-def: $v8m8
+; RV32-NEXT:    andi a1, a2, 2
+; RV32-NEXT:    beqz a1, .LBB27_4
+; RV32-NEXT:  .LBB27_3: # %cond.load1
+; RV32-NEXT:    lh a1, 2(a0)
+; RV32-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 1
+; RV32-NEXT:  .LBB27_4: # %else2
+; RV32-NEXT:    andi a1, a2, 4
+; RV32-NEXT:    bnez a1, .LBB27_74
+; RV32-NEXT:  # %bb.5: # %else5
+; RV32-NEXT:    andi a1, a2, 8
+; RV32-NEXT:    bnez a1, .LBB27_75
+; RV32-NEXT:  .LBB27_6: # %else8
+; RV32-NEXT:    andi a1, a2, 16
+; RV32-NEXT:    bnez a1, .LBB27_76
+; RV32-NEXT:  .LBB27_7: # %else11
+; RV32-NEXT:    andi a1, a2, 32
+; RV32-NEXT:    bnez a1, .LBB27_77
+; RV32-NEXT:  .LBB27_8: # %else14
+; RV32-NEXT:    andi a1, a2, 64
+; RV32-NEXT:    bnez a1, .LBB27_78
+; RV32-NEXT:  .LBB27_9: # %else17
+; RV32-NEXT:    andi a1, a2, 128
+; RV32-NEXT:    bnez a1, .LBB27_79
+; RV32-NEXT:  .LBB27_10: # %else20
+; RV32-NEXT:    andi a1, a2, 256
+; RV32-NEXT:    bnez a1, .LBB27_80
+; RV32-NEXT:  .LBB27_11: # %else23
+; RV32-NEXT:    andi a1, a2, 512
+; RV32-NEXT:    bnez a1, .LBB27_81
+; RV32-NEXT:  .LBB27_12: # %else26
+; RV32-NEXT:    andi a1, a2, 1024
+; RV32-NEXT:    bnez a1, .LBB27_82
+; RV32-NEXT:  .LBB27_13: # %else29
+; RV32-NEXT:    slli a1, a2, 20
+; RV32-NEXT:    bltz a1, .LBB27_83
+; RV32-NEXT:  .LBB27_14: # %else32
+; RV32-NEXT:    slli a1, a2, 19
+; RV32-NEXT:    bltz a1, .LBB27_84
+; RV32-NEXT:  .LBB27_15: # %else35
+; RV32-NEXT:    slli a1, a2, 18
+; RV32-NEXT:    bltz a1, .LBB27_85
+; RV32-NEXT:  .LBB27_16: # %else38
+; RV32-NEXT:    slli a1, a2, 17
+; RV32-NEXT:    bltz a1, .LBB27_86
+; RV32-NEXT:  .LBB27_17: # %else41
+; RV32-NEXT:    slli a1, a2, 16
+; RV32-NEXT:    bltz a1, .LBB27_87
+; RV32-NEXT:  .LBB27_18: # %else44
+; RV32-NEXT:    slli a1, a2, 15
+; RV32-NEXT:    bltz a1, .LBB27_88
+; RV32-NEXT:  .LBB27_19: # %else47
+; RV32-NEXT:    slli a1, a2, 14
+; RV32-NEXT:    bltz a1, .LBB27_89
+; RV32-NEXT:  .LBB27_20: # %else50
+; RV32-NEXT:    slli a1, a2, 13
+; RV32-NEXT:    bltz a1, .LBB27_90
+; RV32-NEXT:  .LBB27_21: # %else53
+; RV32-NEXT:    slli a1, a2, 12
+; RV32-NEXT:    bltz a1, .LBB27_91
+; RV32-NEXT:  .LBB27_22: # %else56
+; RV32-NEXT:    slli a1, a2, 11
+; RV32-NEXT:    bltz a1, .LBB27_92
+; RV32-NEXT:  .LBB27_23: # %else59
+; RV32-NEXT:    slli a1, a2, 10
+; RV32-NEXT:    bltz a1, .LBB27_93
+; RV32-NEXT:  .LBB27_24: # %else62
+; RV32-NEXT:    slli a1, a2, 9
+; RV32-NEXT:    bltz a1, .LBB27_94
+; RV32-NEXT:  .LBB27_25: # %else65
+; RV32-NEXT:    slli a1, a2, 8
+; RV32-NEXT:    bltz a1, .LBB27_95
+; RV32-NEXT:  .LBB27_26: # %else68
+; RV32-NEXT:    slli a1, a2, 7
+; RV32-NEXT:    bltz a1, .LBB27_96
+; RV32-NEXT:  .LBB27_27: # %else71
+; RV32-NEXT:    slli a1, a2, 6
+; RV32-NEXT:    bltz a1, .LBB27_97
+; RV32-NEXT:  .LBB27_28: # %else74
+; RV32-NEXT:    slli a1, a2, 5
+; RV32-NEXT:    bltz a1, .LBB27_98
+; RV32-NEXT:  .LBB27_29: # %else77
+; RV32-NEXT:    slli a1, a2, 4
+; RV32-NEXT:    bltz a1, .LBB27_99
+; RV32-NEXT:  .LBB27_30: # %else80
+; RV32-NEXT:    slli a1, a2, 3
+; RV32-NEXT:    bgez a1, .LBB27_32
+; RV32-NEXT:  .LBB27_31: # %cond.load82
+; RV32-NEXT:    lh a1, 56(a0)
+; RV32-NEXT:    vsetivli zero, 29, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vmv8r.v v24, v8
+; RV32-NEXT:    vslideup.vi v8, v16, 28
+; RV32-NEXT:    vmv4r.v v24, v8
+; RV32-NEXT:    vmv8r.v v8, v24
+; RV32-NEXT:  .LBB27_32: # %else83
+; RV32-NEXT:    slli a3, a2, 2
+; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    bgez a3, .LBB27_34
+; RV32-NEXT:  # %bb.33: # %cond.load85
+; RV32-NEXT:    lh a3, 58(a0)
+; RV32-NEXT:    vsetivli zero, 30, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a3
+; RV32-NEXT:    vmv8r.v v24, v8
+; RV32-NEXT:    vslideup.vi v8, v16, 29
+; RV32-NEXT:    vmv4r.v v24, v8
+; RV32-NEXT:    vmv8r.v v8, v24
+; RV32-NEXT:  .LBB27_34: # %else86
+; RV32-NEXT:    slli a3, a2, 1
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vsrl.vx v16, v0, a1
+; RV32-NEXT:    bgez a3, .LBB27_36
+; RV32-NEXT:  # %bb.35: # %cond.load88
+; RV32-NEXT:    lh a3, 60(a0)
+; RV32-NEXT:    vmv.s.x v20, a3
+; RV32-NEXT:    vmv8r.v v24, v8
+; RV32-NEXT:    vsetivli zero, 31, e16, m4, tu, ma
+; RV32-NEXT:    vslideup.vi v8, v20, 30
+; RV32-NEXT:    vmv4r.v v24, v8
+; RV32-NEXT:    vmv8r.v v8, v24
+; RV32-NEXT:  .LBB27_36: # %else89
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vmv.x.s a3, v16
+; RV32-NEXT:    bltz a2, .LBB27_100
+; RV32-NEXT:  # %bb.37: # %else92
+; RV32-NEXT:    andi a2, a3, 1
+; RV32-NEXT:    bnez a2, .LBB27_101
+; RV32-NEXT:  .LBB27_38: # %else95
+; RV32-NEXT:    andi a2, a3, 2
+; RV32-NEXT:    bnez a2, .LBB27_102
+; RV32-NEXT:  .LBB27_39: # %else98
+; RV32-NEXT:    andi a2, a3, 4
+; RV32-NEXT:    bnez a2, .LBB27_103
+; RV32-NEXT:  .LBB27_40: # %else101
+; RV32-NEXT:    andi a2, a3, 8
+; RV32-NEXT:    bnez a2, .LBB27_104
+; RV32-NEXT:  .LBB27_41: # %else104
+; RV32-NEXT:    andi a2, a3, 16
+; RV32-NEXT:    bnez a2, .LBB27_105
+; RV32-NEXT:  .LBB27_42: # %else107
+; RV32-NEXT:    andi a2, a3, 32
+; RV32-NEXT:    bnez a2, .LBB27_106
+; RV32-NEXT:  .LBB27_43: # %else110
+; RV32-NEXT:    andi a2, a3, 64
+; RV32-NEXT:    bnez a2, .LBB27_107
+; RV32-NEXT:  .LBB27_44: # %else113
+; RV32-NEXT:    andi a2, a3, 128
+; RV32-NEXT:    bnez a2, .LBB27_108
+; RV32-NEXT:  .LBB27_45: # %else116
+; RV32-NEXT:    andi a2, a3, 256
+; RV32-NEXT:    bnez a2, .LBB27_109
+; RV32-NEXT:  .LBB27_46: # %else119
+; RV32-NEXT:    andi a2, a3, 512
+; RV32-NEXT:    bnez a2, .LBB27_110
+; RV32-NEXT:  .LBB27_47: # %else122
+; RV32-NEXT:    andi a2, a3, 1024
+; RV32-NEXT:    bnez a2, .LBB27_111
+; RV32-NEXT:  .LBB27_48: # %else125
+; RV32-NEXT:    slli a2, a3, 20
+; RV32-NEXT:    bltz a2, .LBB27_112
+; RV32-NEXT:  .LBB27_49: # %else128
+; RV32-NEXT:    slli a2, a3, 19
+; RV32-NEXT:    bltz a2, .LBB27_113
+; RV32-NEXT:  .LBB27_50: # %else131
+; RV32-NEXT:    slli a2, a3, 18
+; RV32-NEXT:    bltz a2, .LBB27_114
+; RV32-NEXT:  .LBB27_51: # %else134
+; RV32-NEXT:    slli a2, a3, 17
+; RV32-NEXT:    bltz a2, .LBB27_115
+; RV32-NEXT:  .LBB27_52: # %else137
+; RV32-NEXT:    slli a2, a3, 16
+; RV32-NEXT:    bltz a2, .LBB27_116
+; RV32-NEXT:  .LBB27_53: # %else140
+; RV32-NEXT:    slli a2, a3, 15
+; RV32-NEXT:    bltz a2, .LBB27_117
+; RV32-NEXT:  .LBB27_54: # %else143
+; RV32-NEXT:    slli a2, a3, 14
+; RV32-NEXT:    bltz a2, .LBB27_118
+; RV32-NEXT:  .LBB27_55: # %else146
+; RV32-NEXT:    slli a2, a3, 13
+; RV32-NEXT:    bltz a2, .LBB27_119
+; RV32-NEXT:  .LBB27_56: # %else149
+; RV32-NEXT:    slli a2, a3, 12
+; RV32-NEXT:    bltz a2, .LBB27_120
+; RV32-NEXT:  .LBB27_57: # %else152
+; RV32-NEXT:    slli a2, a3, 11
+; RV32-NEXT:    bltz a2, .LBB27_121
+; RV32-NEXT:  .LBB27_58: # %else155
+; RV32-NEXT:    slli a2, a3, 10
+; RV32-NEXT:    bltz a2, .LBB27_122
+; RV32-NEXT:  .LBB27_59: # %else158
+; RV32-NEXT:    slli a2, a3, 9
+; RV32-NEXT:    bltz a2, .LBB27_123
+; RV32-NEXT:  .LBB27_60: # %else161
+; RV32-NEXT:    slli a2, a3, 8
+; RV32-NEXT:    bltz a2, .LBB27_124
+; RV32-NEXT:  .LBB27_61: # %else164
+; RV32-NEXT:    slli a2, a3, 7
+; RV32-NEXT:    bltz a2, .LBB27_125
+; RV32-NEXT:  .LBB27_62: # %else167
+; RV32-NEXT:    slli a2, a3, 6
+; RV32-NEXT:    bltz a2, .LBB27_126
+; RV32-NEXT:  .LBB27_63: # %else170
+; RV32-NEXT:    slli a2, a3, 5
+; RV32-NEXT:    bltz a2, .LBB27_127
+; RV32-NEXT:  .LBB27_64: # %else173
+; RV32-NEXT:    slli a2, a3, 4
+; RV32-NEXT:    bltz a2, .LBB27_128
+; RV32-NEXT:  .LBB27_65: # %else176
+; RV32-NEXT:    slli a2, a3, 3
+; RV32-NEXT:    bltz a2, .LBB27_129
+; RV32-NEXT:  .LBB27_66: # %else179
+; RV32-NEXT:    slli a2, a3, 2
+; RV32-NEXT:    bgez a2, .LBB27_68
+; RV32-NEXT:  .LBB27_67: # %cond.load181
+; RV32-NEXT:    lh a2, 122(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 62
+; RV32-NEXT:    li a4, 61
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:  .LBB27_68: # %else182
+; RV32-NEXT:    slli a2, a3, 1
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v24, v0, 1
+; RV32-NEXT:    bgez a2, .LBB27_70
+; RV32-NEXT:  # %bb.69: # %cond.load184
+; RV32-NEXT:    lh a2, 124(a0)
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 63
+; RV32-NEXT:    li a4, 62
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:  .LBB27_70: # %else185
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vmv.x.s a2, v24
+; RV32-NEXT:    bgez a3, .LBB27_72
+; RV32-NEXT:  # %bb.71: # %cond.load187
+; RV32-NEXT:    lh a3, 126(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vmv.s.x v16, a3
+; RV32-NEXT:    li a3, 63
+; RV32-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a3
+; RV32-NEXT:  .LBB27_72: # %else188
+; RV32-NEXT:    andi a3, a2, 1
+; RV32-NEXT:    beqz a3, .LBB27_130
+; RV32-NEXT:  # %bb.73: # %cond.load190
+; RV32-NEXT:    lh a3, 128(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a3
+; RV32-NEXT:    andi a3, a2, 2
+; RV32-NEXT:    bnez a3, .LBB27_131
+; RV32-NEXT:    j .LBB27_132
+; RV32-NEXT:  .LBB27_74: # %cond.load4
+; RV32-NEXT:    lh a1, 4(a0)
+; RV32-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 2
+; RV32-NEXT:    andi a1, a2, 8
+; RV32-NEXT:    beqz a1, .LBB27_6
+; RV32-NEXT:  .LBB27_75: # %cond.load7
+; RV32-NEXT:    lh a1, 6(a0)
+; RV32-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 3
+; RV32-NEXT:    andi a1, a2, 16
+; RV32-NEXT:    beqz a1, .LBB27_7
+; RV32-NEXT:  .LBB27_76: # %cond.load10
+; RV32-NEXT:    lh a1, 8(a0)
+; RV32-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 4
+; RV32-NEXT:    andi a1, a2, 32
+; RV32-NEXT:    beqz a1, .LBB27_8
+; RV32-NEXT:  .LBB27_77: # %cond.load13
+; RV32-NEXT:    lh a1, 10(a0)
+; RV32-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 5
+; RV32-NEXT:    andi a1, a2, 64
+; RV32-NEXT:    beqz a1, .LBB27_9
+; RV32-NEXT:  .LBB27_78: # %cond.load16
+; RV32-NEXT:    lh a1, 12(a0)
+; RV32-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 6
+; RV32-NEXT:    andi a1, a2, 128
+; RV32-NEXT:    beqz a1, .LBB27_10
+; RV32-NEXT:  .LBB27_79: # %cond.load19
+; RV32-NEXT:    lh a1, 14(a0)
+; RV32-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 7
+; RV32-NEXT:    andi a1, a2, 256
+; RV32-NEXT:    beqz a1, .LBB27_11
+; RV32-NEXT:  .LBB27_80: # %cond.load22
+; RV32-NEXT:    lh a1, 16(a0)
+; RV32-NEXT:    vsetivli zero, 9, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 8
+; RV32-NEXT:    andi a1, a2, 512
+; RV32-NEXT:    beqz a1, .LBB27_12
+; RV32-NEXT:  .LBB27_81: # %cond.load25
+; RV32-NEXT:    lh a1, 18(a0)
+; RV32-NEXT:    vsetivli zero, 10, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 9
+; RV32-NEXT:    andi a1, a2, 1024
+; RV32-NEXT:    beqz a1, .LBB27_13
+; RV32-NEXT:  .LBB27_82: # %cond.load28
+; RV32-NEXT:    lh a1, 20(a0)
+; RV32-NEXT:    vsetivli zero, 11, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 10
+; RV32-NEXT:    slli a1, a2, 20
+; RV32-NEXT:    bgez a1, .LBB27_14
+; RV32-NEXT:  .LBB27_83: # %cond.load31
+; RV32-NEXT:    lh a1, 22(a0)
+; RV32-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 11
+; RV32-NEXT:    slli a1, a2, 19
+; RV32-NEXT:    bgez a1, .LBB27_15
+; RV32-NEXT:  .LBB27_84: # %cond.load34
+; RV32-NEXT:    lh a1, 24(a0)
+; RV32-NEXT:    vsetivli zero, 13, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 12
+; RV32-NEXT:    slli a1, a2, 18
+; RV32-NEXT:    bgez a1, .LBB27_16
+; RV32-NEXT:  .LBB27_85: # %cond.load37
+; RV32-NEXT:    lh a1, 26(a0)
+; RV32-NEXT:    vsetivli zero, 14, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 13
+; RV32-NEXT:    slli a1, a2, 17
+; RV32-NEXT:    bgez a1, .LBB27_17
+; RV32-NEXT:  .LBB27_86: # %cond.load40
+; RV32-NEXT:    lh a1, 28(a0)
+; RV32-NEXT:    vsetivli zero, 15, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 14
+; RV32-NEXT:    slli a1, a2, 16
+; RV32-NEXT:    bgez a1, .LBB27_18
+; RV32-NEXT:  .LBB27_87: # %cond.load43
+; RV32-NEXT:    lh a1, 30(a0)
+; RV32-NEXT:    vsetivli zero, 16, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 15
+; RV32-NEXT:    slli a1, a2, 15
+; RV32-NEXT:    bgez a1, .LBB27_19
+; RV32-NEXT:  .LBB27_88: # %cond.load46
+; RV32-NEXT:    lh a1, 32(a0)
+; RV32-NEXT:    vsetivli zero, 17, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 16
+; RV32-NEXT:    slli a1, a2, 14
+; RV32-NEXT:    bgez a1, .LBB27_20
+; RV32-NEXT:  .LBB27_89: # %cond.load49
+; RV32-NEXT:    lh a1, 34(a0)
+; RV32-NEXT:    vsetivli zero, 18, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 17
+; RV32-NEXT:    slli a1, a2, 13
+; RV32-NEXT:    bgez a1, .LBB27_21
+; RV32-NEXT:  .LBB27_90: # %cond.load52
+; RV32-NEXT:    lh a1, 36(a0)
+; RV32-NEXT:    vsetivli zero, 19, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 18
+; RV32-NEXT:    slli a1, a2, 12
+; RV32-NEXT:    bgez a1, .LBB27_22
+; RV32-NEXT:  .LBB27_91: # %cond.load55
+; RV32-NEXT:    lh a1, 38(a0)
+; RV32-NEXT:    vsetivli zero, 20, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 19
+; RV32-NEXT:    slli a1, a2, 11
+; RV32-NEXT:    bgez a1, .LBB27_23
+; RV32-NEXT:  .LBB27_92: # %cond.load58
+; RV32-NEXT:    lh a1, 40(a0)
+; RV32-NEXT:    vsetivli zero, 21, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 20
+; RV32-NEXT:    slli a1, a2, 10
+; RV32-NEXT:    bgez a1, .LBB27_24
+; RV32-NEXT:  .LBB27_93: # %cond.load61
+; RV32-NEXT:    lh a1, 42(a0)
+; RV32-NEXT:    vsetivli zero, 22, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 21
+; RV32-NEXT:    slli a1, a2, 9
+; RV32-NEXT:    bgez a1, .LBB27_25
+; RV32-NEXT:  .LBB27_94: # %cond.load64
+; RV32-NEXT:    lh a1, 44(a0)
+; RV32-NEXT:    vsetivli zero, 23, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 22
+; RV32-NEXT:    slli a1, a2, 8
+; RV32-NEXT:    bgez a1, .LBB27_26
+; RV32-NEXT:  .LBB27_95: # %cond.load67
+; RV32-NEXT:    lh a1, 46(a0)
+; RV32-NEXT:    vsetivli zero, 24, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 23
+; RV32-NEXT:    slli a1, a2, 7
+; RV32-NEXT:    bgez a1, .LBB27_27
+; RV32-NEXT:  .LBB27_96: # %cond.load70
+; RV32-NEXT:    lh a1, 48(a0)
+; RV32-NEXT:    vsetivli zero, 25, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 24
+; RV32-NEXT:    slli a1, a2, 6
+; RV32-NEXT:    bgez a1, .LBB27_28
+; RV32-NEXT:  .LBB27_97: # %cond.load73
+; RV32-NEXT:    lh a1, 50(a0)
+; RV32-NEXT:    vsetivli zero, 26, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vslideup.vi v8, v16, 25
+; RV32-NEXT:    slli a1, a2, 5
+; RV32-NEXT:    bgez a1, .LBB27_29
+; RV32-NEXT:  .LBB27_98: # %cond.load76
+; RV32-NEXT:    lh a1, 52(a0)
+; RV32-NEXT:    vsetivli zero, 27, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vmv8r.v v24, v8
+; RV32-NEXT:    vslideup.vi v8, v16, 26
+; RV32-NEXT:    vmv4r.v v24, v8
+; RV32-NEXT:    vmv8r.v v8, v24
+; RV32-NEXT:    slli a1, a2, 4
+; RV32-NEXT:    bgez a1, .LBB27_30
+; RV32-NEXT:  .LBB27_99: # %cond.load79
+; RV32-NEXT:    lh a1, 54(a0)
+; RV32-NEXT:    vsetivli zero, 28, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v16, a1
+; RV32-NEXT:    vmv8r.v v24, v8
+; RV32-NEXT:    vslideup.vi v8, v16, 27
+; RV32-NEXT:    vmv4r.v v24, v8
+; RV32-NEXT:    vmv8r.v v8, v24
+; RV32-NEXT:    slli a1, a2, 3
+; RV32-NEXT:    bltz a1, .LBB27_31
+; RV32-NEXT:    j .LBB27_32
+; RV32-NEXT:  .LBB27_100: # %cond.load91
+; RV32-NEXT:    lh a2, 62(a0)
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    vmv8r.v v24, v8
+; RV32-NEXT:    li a2, 32
+; RV32-NEXT:    vsetvli zero, a2, e16, m4, tu, ma
+; RV32-NEXT:    vslideup.vi v8, v16, 31
+; RV32-NEXT:    vmv4r.v v24, v8
+; RV32-NEXT:    vmv8r.v v8, v24
+; RV32-NEXT:    andi a2, a3, 1
+; RV32-NEXT:    beqz a2, .LBB27_38
+; RV32-NEXT:  .LBB27_101: # %cond.load94
+; RV32-NEXT:    lh a2, 64(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 33
+; RV32-NEXT:    li a4, 32
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    andi a2, a3, 2
+; RV32-NEXT:    beqz a2, .LBB27_39
+; RV32-NEXT:  .LBB27_102: # %cond.load97
+; RV32-NEXT:    lh a2, 66(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 34
+; RV32-NEXT:    li a4, 33
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    andi a2, a3, 4
+; RV32-NEXT:    beqz a2, .LBB27_40
+; RV32-NEXT:  .LBB27_103: # %cond.load100
+; RV32-NEXT:    lh a2, 68(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 35
+; RV32-NEXT:    li a4, 34
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    andi a2, a3, 8
+; RV32-NEXT:    beqz a2, .LBB27_41
+; RV32-NEXT:  .LBB27_104: # %cond.load103
+; RV32-NEXT:    lh a2, 70(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 36
+; RV32-NEXT:    li a4, 35
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    andi a2, a3, 16
+; RV32-NEXT:    beqz a2, .LBB27_42
+; RV32-NEXT:  .LBB27_105: # %cond.load106
+; RV32-NEXT:    lh a2, 72(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 37
+; RV32-NEXT:    li a4, 36
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    andi a2, a3, 32
+; RV32-NEXT:    beqz a2, .LBB27_43
+; RV32-NEXT:  .LBB27_106: # %cond.load109
+; RV32-NEXT:    lh a2, 74(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 38
+; RV32-NEXT:    li a4, 37
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    andi a2, a3, 64
+; RV32-NEXT:    beqz a2, .LBB27_44
+; RV32-NEXT:  .LBB27_107: # %cond.load112
+; RV32-NEXT:    lh a2, 76(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 39
+; RV32-NEXT:    li a4, 38
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    andi a2, a3, 128
+; RV32-NEXT:    beqz a2, .LBB27_45
+; RV32-NEXT:  .LBB27_108: # %cond.load115
+; RV32-NEXT:    lh a2, 78(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 40
+; RV32-NEXT:    li a4, 39
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    andi a2, a3, 256
+; RV32-NEXT:    beqz a2, .LBB27_46
+; RV32-NEXT:  .LBB27_109: # %cond.load118
+; RV32-NEXT:    lh a2, 80(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 41
+; RV32-NEXT:    li a4, 40
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    andi a2, a3, 512
+; RV32-NEXT:    beqz a2, .LBB27_47
+; RV32-NEXT:  .LBB27_110: # %cond.load121
+; RV32-NEXT:    lh a2, 82(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 42
+; RV32-NEXT:    li a4, 41
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    andi a2, a3, 1024
+; RV32-NEXT:    beqz a2, .LBB27_48
+; RV32-NEXT:  .LBB27_111: # %cond.load124
+; RV32-NEXT:    lh a2, 84(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 43
+; RV32-NEXT:    li a4, 42
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    slli a2, a3, 20
+; RV32-NEXT:    bgez a2, .LBB27_49
+; RV32-NEXT:  .LBB27_112: # %cond.load127
+; RV32-NEXT:    lh a2, 86(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 44
+; RV32-NEXT:    li a4, 43
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    slli a2, a3, 19
+; RV32-NEXT:    bgez a2, .LBB27_50
+; RV32-NEXT:  .LBB27_113: # %cond.load130
+; RV32-NEXT:    lh a2, 88(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 45
+; RV32-NEXT:    li a4, 44
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    slli a2, a3, 18
+; RV32-NEXT:    bgez a2, .LBB27_51
+; RV32-NEXT:  .LBB27_114: # %cond.load133
+; RV32-NEXT:    lh a2, 90(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 46
+; RV32-NEXT:    li a4, 45
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    slli a2, a3, 17
+; RV32-NEXT:    bgez a2, .LBB27_52
+; RV32-NEXT:  .LBB27_115: # %cond.load136
+; RV32-NEXT:    lh a2, 92(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 47
+; RV32-NEXT:    li a4, 46
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    slli a2, a3, 16
+; RV32-NEXT:    bgez a2, .LBB27_53
+; RV32-NEXT:  .LBB27_116: # %cond.load139
+; RV32-NEXT:    lh a2, 94(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 48
+; RV32-NEXT:    li a4, 47
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    slli a2, a3, 15
+; RV32-NEXT:    bgez a2, .LBB27_54
+; RV32-NEXT:  .LBB27_117: # %cond.load142
+; RV32-NEXT:    lh a2, 96(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 49
+; RV32-NEXT:    li a4, 48
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    slli a2, a3, 14
+; RV32-NEXT:    bgez a2, .LBB27_55
+; RV32-NEXT:  .LBB27_118: # %cond.load145
+; RV32-NEXT:    lh a2, 98(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 50
+; RV32-NEXT:    li a4, 49
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    slli a2, a3, 13
+; RV32-NEXT:    bgez a2, .LBB27_56
+; RV32-NEXT:  .LBB27_119: # %cond.load148
+; RV32-NEXT:    lh a2, 100(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 51
+; RV32-NEXT:    li a4, 50
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    slli a2, a3, 12
+; RV32-NEXT:    bgez a2, .LBB27_57
+; RV32-NEXT:  .LBB27_120: # %cond.load151
+; RV32-NEXT:    lh a2, 102(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 52
+; RV32-NEXT:    li a4, 51
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    slli a2, a3, 11
+; RV32-NEXT:    bgez a2, .LBB27_58
+; RV32-NEXT:  .LBB27_121: # %cond.load154
+; RV32-NEXT:    lh a2, 104(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 53
+; RV32-NEXT:    li a4, 52
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    slli a2, a3, 10
+; RV32-NEXT:    bgez a2, .LBB27_59
+; RV32-NEXT:  .LBB27_122: # %cond.load157
+; RV32-NEXT:    lh a2, 106(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 54
+; RV32-NEXT:    li a4, 53
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    slli a2, a3, 9
+; RV32-NEXT:    bgez a2, .LBB27_60
+; RV32-NEXT:  .LBB27_123: # %cond.load160
+; RV32-NEXT:    lh a2, 108(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 55
+; RV32-NEXT:    li a4, 54
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    slli a2, a3, 8
+; RV32-NEXT:    bgez a2, .LBB27_61
+; RV32-NEXT:  .LBB27_124: # %cond.load163
+; RV32-NEXT:    lh a2, 110(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 56
+; RV32-NEXT:    li a4, 55
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    slli a2, a3, 7
+; RV32-NEXT:    bgez a2, .LBB27_62
+; RV32-NEXT:  .LBB27_125: # %cond.load166
+; RV32-NEXT:    lh a2, 112(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 57
+; RV32-NEXT:    li a4, 56
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    slli a2, a3, 6
+; RV32-NEXT:    bgez a2, .LBB27_63
+; RV32-NEXT:  .LBB27_126: # %cond.load169
+; RV32-NEXT:    lh a2, 114(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 58
+; RV32-NEXT:    li a4, 57
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    slli a2, a3, 5
+; RV32-NEXT:    bgez a2, .LBB27_64
+; RV32-NEXT:  .LBB27_127: # %cond.load172
+; RV32-NEXT:    lh a2, 116(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 59
+; RV32-NEXT:    li a4, 58
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    slli a2, a3, 4
+; RV32-NEXT:    bgez a2, .LBB27_65
+; RV32-NEXT:  .LBB27_128: # %cond.load175
+; RV32-NEXT:    lh a2, 118(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 60
+; RV32-NEXT:    li a4, 59
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    slli a2, a3, 3
+; RV32-NEXT:    bgez a2, .LBB27_66
+; RV32-NEXT:  .LBB27_129: # %cond.load178
+; RV32-NEXT:    lh a2, 120(a0)
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v16, a2
+; RV32-NEXT:    li a2, 61
+; RV32-NEXT:    li a4, 60
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v8, v16, a4
+; RV32-NEXT:    slli a2, a3, 2
+; RV32-NEXT:    bltz a2, .LBB27_67
+; RV32-NEXT:    j .LBB27_68
+; RV32-NEXT:  .LBB27_130:
+; RV32-NEXT:    # implicit-def: $v16m8
+; RV32-NEXT:    andi a3, a2, 2
+; RV32-NEXT:    beqz a3, .LBB27_132
+; RV32-NEXT:  .LBB27_131: # %cond.load193
+; RV32-NEXT:    lh a3, 130(a0)
+; RV32-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v25, a3
+; RV32-NEXT:    vslideup.vi v16, v25, 1
+; RV32-NEXT:  .LBB27_132: # %else194
+; RV32-NEXT:    andi a3, a2, 4
+; RV32-NEXT:    bnez a3, .LBB27_197
+; RV32-NEXT:  # %bb.133: # %else197
+; RV32-NEXT:    andi a3, a2, 8
+; RV32-NEXT:    bnez a3, .LBB27_198
+; RV32-NEXT:  .LBB27_134: # %else200
+; RV32-NEXT:    andi a3, a2, 16
+; RV32-NEXT:    bnez a3, .LBB27_199
+; RV32-NEXT:  .LBB27_135: # %else203
+; RV32-NEXT:    andi a3, a2, 32
+; RV32-NEXT:    bnez a3, .LBB27_200
+; RV32-NEXT:  .LBB27_136: # %else206
+; RV32-NEXT:    andi a3, a2, 64
+; RV32-NEXT:    bnez a3, .LBB27_201
+; RV32-NEXT:  .LBB27_137: # %else209
+; RV32-NEXT:    andi a3, a2, 128
+; RV32-NEXT:    bnez a3, .LBB27_202
+; RV32-NEXT:  .LBB27_138: # %else212
+; RV32-NEXT:    andi a3, a2, 256
+; RV32-NEXT:    bnez a3, .LBB27_203
+; RV32-NEXT:  .LBB27_139: # %else215
+; RV32-NEXT:    andi a3, a2, 512
+; RV32-NEXT:    bnez a3, .LBB27_204
+; RV32-NEXT:  .LBB27_140: # %else218
+; RV32-NEXT:    andi a3, a2, 1024
+; RV32-NEXT:    bnez a3, .LBB27_205
+; RV32-NEXT:  .LBB27_141: # %else221
+; RV32-NEXT:    slli a3, a2, 20
+; RV32-NEXT:    bltz a3, .LBB27_206
+; RV32-NEXT:  .LBB27_142: # %else224
+; RV32-NEXT:    slli a3, a2, 19
+; RV32-NEXT:    bltz a3, .LBB27_207
+; RV32-NEXT:  .LBB27_143: # %else227
+; RV32-NEXT:    slli a3, a2, 18
+; RV32-NEXT:    bltz a3, .LBB27_208
+; RV32-NEXT:  .LBB27_144: # %else230
+; RV32-NEXT:    slli a3, a2, 17
+; RV32-NEXT:    bltz a3, .LBB27_209
+; RV32-NEXT:  .LBB27_145: # %else233
+; RV32-NEXT:    slli a3, a2, 16
+; RV32-NEXT:    bltz a3, .LBB27_210
+; RV32-NEXT:  .LBB27_146: # %else236
+; RV32-NEXT:    slli a3, a2, 15
+; RV32-NEXT:    bltz a3, .LBB27_211
+; RV32-NEXT:  .LBB27_147: # %else239
+; RV32-NEXT:    slli a3, a2, 14
+; RV32-NEXT:    bltz a3, .LBB27_212
+; RV32-NEXT:  .LBB27_148: # %else242
+; RV32-NEXT:    slli a3, a2, 13
+; RV32-NEXT:    bltz a3, .LBB27_213
+; RV32-NEXT:  .LBB27_149: # %else245
+; RV32-NEXT:    slli a3, a2, 12
+; RV32-NEXT:    bltz a3, .LBB27_214
+; RV32-NEXT:  .LBB27_150: # %else248
+; RV32-NEXT:    slli a3, a2, 11
+; RV32-NEXT:    bltz a3, .LBB27_215
+; RV32-NEXT:  .LBB27_151: # %else251
+; RV32-NEXT:    slli a3, a2, 10
+; RV32-NEXT:    bltz a3, .LBB27_216
+; RV32-NEXT:  .LBB27_152: # %else254
+; RV32-NEXT:    slli a3, a2, 9
+; RV32-NEXT:    bltz a3, .LBB27_217
+; RV32-NEXT:  .LBB27_153: # %else257
+; RV32-NEXT:    slli a3, a2, 8
+; RV32-NEXT:    bltz a3, .LBB27_218
+; RV32-NEXT:  .LBB27_154: # %else260
+; RV32-NEXT:    slli a3, a2, 7
+; RV32-NEXT:    bltz a3, .LBB27_219
+; RV32-NEXT:  .LBB27_155: # %else263
+; RV32-NEXT:    slli a3, a2, 6
+; RV32-NEXT:    bltz a3, .LBB27_220
+; RV32-NEXT:  .LBB27_156: # %else266
+; RV32-NEXT:    slli a3, a2, 5
+; RV32-NEXT:    bltz a3, .LBB27_221
+; RV32-NEXT:  .LBB27_157: # %else269
+; RV32-NEXT:    slli a3, a2, 4
+; RV32-NEXT:    bltz a3, .LBB27_222
+; RV32-NEXT:  .LBB27_158: # %else272
+; RV32-NEXT:    slli a3, a2, 3
+; RV32-NEXT:    bltz a3, .LBB27_223
+; RV32-NEXT:  .LBB27_159: # %else275
+; RV32-NEXT:    slli a3, a2, 2
+; RV32-NEXT:    bgez a3, .LBB27_161
+; RV32-NEXT:  .LBB27_160: # %cond.load277
+; RV32-NEXT:    lh a3, 186(a0)
+; RV32-NEXT:    vsetivli zero, 30, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v28, a3
+; RV32-NEXT:    vslideup.vi v16, v28, 29
+; RV32-NEXT:  .LBB27_161: # %else278
+; RV32-NEXT:    slli a3, a2, 1
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vsrl.vx v24, v24, a1
+; RV32-NEXT:    bgez a3, .LBB27_163
+; RV32-NEXT:  # %bb.162: # %cond.load280
+; RV32-NEXT:    lh a1, 188(a0)
+; RV32-NEXT:    vmv.s.x v28, a1
+; RV32-NEXT:    vsetivli zero, 31, e16, m4, tu, ma
+; RV32-NEXT:    vslideup.vi v16, v28, 30
+; RV32-NEXT:  .LBB27_163: # %else281
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vmv.x.s a1, v24
+; RV32-NEXT:    bltz a2, .LBB27_224
+; RV32-NEXT:  # %bb.164: # %else284
+; RV32-NEXT:    andi a2, a1, 1
+; RV32-NEXT:    bnez a2, .LBB27_225
+; RV32-NEXT:  .LBB27_165: # %else287
+; RV32-NEXT:    andi a2, a1, 2
+; RV32-NEXT:    bnez a2, .LBB27_226
+; RV32-NEXT:  .LBB27_166: # %else290
+; RV32-NEXT:    andi a2, a1, 4
+; RV32-NEXT:    bnez a2, .LBB27_227
+; RV32-NEXT:  .LBB27_167: # %else293
+; RV32-NEXT:    andi a2, a1, 8
+; RV32-NEXT:    bnez a2, .LBB27_228
+; RV32-NEXT:  .LBB27_168: # %else296
+; RV32-NEXT:    andi a2, a1, 16
+; RV32-NEXT:    bnez a2, .LBB27_229
+; RV32-NEXT:  .LBB27_169: # %else299
+; RV32-NEXT:    andi a2, a1, 32
+; RV32-NEXT:    bnez a2, .LBB27_230
+; RV32-NEXT:  .LBB27_170: # %else302
+; RV32-NEXT:    andi a2, a1, 64
+; RV32-NEXT:    bnez a2, .LBB27_231
+; RV32-NEXT:  .LBB27_171: # %else305
+; RV32-NEXT:    andi a2, a1, 128
+; RV32-NEXT:    bnez a2, .LBB27_232
+; RV32-NEXT:  .LBB27_172: # %else308
+; RV32-NEXT:    andi a2, a1, 256
+; RV32-NEXT:    bnez a2, .LBB27_233
+; RV32-NEXT:  .LBB27_173: # %else311
+; RV32-NEXT:    andi a2, a1, 512
+; RV32-NEXT:    bnez a2, .LBB27_234
+; RV32-NEXT:  .LBB27_174: # %else314
+; RV32-NEXT:    andi a2, a1, 1024
+; RV32-NEXT:    bnez a2, .LBB27_235
+; RV32-NEXT:  .LBB27_175: # %else317
+; RV32-NEXT:    slli a2, a1, 20
+; RV32-NEXT:    bltz a2, .LBB27_236
+; RV32-NEXT:  .LBB27_176: # %else320
+; RV32-NEXT:    slli a2, a1, 19
+; RV32-NEXT:    bltz a2, .LBB27_237
+; RV32-NEXT:  .LBB27_177: # %else323
+; RV32-NEXT:    slli a2, a1, 18
+; RV32-NEXT:    bltz a2, .LBB27_238
+; RV32-NEXT:  .LBB27_178: # %else326
+; RV32-NEXT:    slli a2, a1, 17
+; RV32-NEXT:    bltz a2, .LBB27_239
+; RV32-NEXT:  .LBB27_179: # %else329
+; RV32-NEXT:    slli a2, a1, 16
+; RV32-NEXT:    bltz a2, .LBB27_240
+; RV32-NEXT:  .LBB27_180: # %else332
+; RV32-NEXT:    slli a2, a1, 15
+; RV32-NEXT:    bltz a2, .LBB27_241
+; RV32-NEXT:  .LBB27_181: # %else335
+; RV32-NEXT:    slli a2, a1, 14
+; RV32-NEXT:    bltz a2, .LBB27_242
+; RV32-NEXT:  .LBB27_182: # %else338
+; RV32-NEXT:    slli a2, a1, 13
+; RV32-NEXT:    bltz a2, .LBB27_243
+; RV32-NEXT:  .LBB27_183: # %else341
+; RV32-NEXT:    slli a2, a1, 12
+; RV32-NEXT:    bltz a2, .LBB27_244
+; RV32-NEXT:  .LBB27_184: # %else344
+; RV32-NEXT:    slli a2, a1, 11
+; RV32-NEXT:    bltz a2, .LBB27_245
+; RV32-NEXT:  .LBB27_185: # %else347
+; RV32-NEXT:    slli a2, a1, 10
+; RV32-NEXT:    bltz a2, .LBB27_246
+; RV32-NEXT:  .LBB27_186: # %else350
+; RV32-NEXT:    slli a2, a1, 9
+; RV32-NEXT:    bltz a2, .LBB27_247
+; RV32-NEXT:  .LBB27_187: # %else353
+; RV32-NEXT:    slli a2, a1, 8
+; RV32-NEXT:    bltz a2, .LBB27_248
+; RV32-NEXT:  .LBB27_188: # %else356
+; RV32-NEXT:    slli a2, a1, 7
+; RV32-NEXT:    bltz a2, .LBB27_249
+; RV32-NEXT:  .LBB27_189: # %else359
+; RV32-NEXT:    slli a2, a1, 6
+; RV32-NEXT:    bltz a2, .LBB27_250
+; RV32-NEXT:  .LBB27_190: # %else362
+; RV32-NEXT:    slli a2, a1, 5
+; RV32-NEXT:    bltz a2, .LBB27_251
+; RV32-NEXT:  .LBB27_191: # %else365
+; RV32-NEXT:    slli a2, a1, 4
+; RV32-NEXT:    bltz a2, .LBB27_252
+; RV32-NEXT:  .LBB27_192: # %else368
+; RV32-NEXT:    slli a2, a1, 3
+; RV32-NEXT:    bltz a2, .LBB27_253
+; RV32-NEXT:  .LBB27_193: # %else371
+; RV32-NEXT:    slli a2, a1, 2
+; RV32-NEXT:    bltz a2, .LBB27_254
+; RV32-NEXT:  .LBB27_194: # %else374
+; RV32-NEXT:    slli a2, a1, 1
+; RV32-NEXT:    bltz a2, .LBB27_255
+; RV32-NEXT:  .LBB27_195: # %else377
+; RV32-NEXT:    bltz a1, .LBB27_256
+; RV32-NEXT:  .LBB27_196: # %else380
+; RV32-NEXT:    ret
+; RV32-NEXT:  .LBB27_197: # %cond.load196
+; RV32-NEXT:    lh a3, 132(a0)
+; RV32-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v25, a3
+; RV32-NEXT:    vslideup.vi v16, v25, 2
+; RV32-NEXT:    andi a3, a2, 8
+; RV32-NEXT:    beqz a3, .LBB27_134
+; RV32-NEXT:  .LBB27_198: # %cond.load199
+; RV32-NEXT:    lh a3, 134(a0)
+; RV32-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v25, a3
+; RV32-NEXT:    vslideup.vi v16, v25, 3
+; RV32-NEXT:    andi a3, a2, 16
+; RV32-NEXT:    beqz a3, .LBB27_135
+; RV32-NEXT:  .LBB27_199: # %cond.load202
+; RV32-NEXT:    lh a3, 136(a0)
+; RV32-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v25, a3
+; RV32-NEXT:    vslideup.vi v16, v25, 4
+; RV32-NEXT:    andi a3, a2, 32
+; RV32-NEXT:    beqz a3, .LBB27_136
+; RV32-NEXT:  .LBB27_200: # %cond.load205
+; RV32-NEXT:    lh a3, 138(a0)
+; RV32-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v25, a3
+; RV32-NEXT:    vslideup.vi v16, v25, 5
+; RV32-NEXT:    andi a3, a2, 64
+; RV32-NEXT:    beqz a3, .LBB27_137
+; RV32-NEXT:  .LBB27_201: # %cond.load208
+; RV32-NEXT:    lh a3, 140(a0)
+; RV32-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v25, a3
+; RV32-NEXT:    vslideup.vi v16, v25, 6
+; RV32-NEXT:    andi a3, a2, 128
+; RV32-NEXT:    beqz a3, .LBB27_138
+; RV32-NEXT:  .LBB27_202: # %cond.load211
+; RV32-NEXT:    lh a3, 142(a0)
+; RV32-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; RV32-NEXT:    vmv.s.x v25, a3
+; RV32-NEXT:    vslideup.vi v16, v25, 7
+; RV32-NEXT:    andi a3, a2, 256
+; RV32-NEXT:    beqz a3, .LBB27_139
+; RV32-NEXT:  .LBB27_203: # %cond.load214
+; RV32-NEXT:    lh a3, 144(a0)
+; RV32-NEXT:    vsetivli zero, 9, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v26, a3
+; RV32-NEXT:    vslideup.vi v16, v26, 8
+; RV32-NEXT:    andi a3, a2, 512
+; RV32-NEXT:    beqz a3, .LBB27_140
+; RV32-NEXT:  .LBB27_204: # %cond.load217
+; RV32-NEXT:    lh a3, 146(a0)
+; RV32-NEXT:    vsetivli zero, 10, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v26, a3
+; RV32-NEXT:    vslideup.vi v16, v26, 9
+; RV32-NEXT:    andi a3, a2, 1024
+; RV32-NEXT:    beqz a3, .LBB27_141
+; RV32-NEXT:  .LBB27_205: # %cond.load220
+; RV32-NEXT:    lh a3, 148(a0)
+; RV32-NEXT:    vsetivli zero, 11, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v26, a3
+; RV32-NEXT:    vslideup.vi v16, v26, 10
+; RV32-NEXT:    slli a3, a2, 20
+; RV32-NEXT:    bgez a3, .LBB27_142
+; RV32-NEXT:  .LBB27_206: # %cond.load223
+; RV32-NEXT:    lh a3, 150(a0)
+; RV32-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v26, a3
+; RV32-NEXT:    vslideup.vi v16, v26, 11
+; RV32-NEXT:    slli a3, a2, 19
+; RV32-NEXT:    bgez a3, .LBB27_143
+; RV32-NEXT:  .LBB27_207: # %cond.load226
+; RV32-NEXT:    lh a3, 152(a0)
+; RV32-NEXT:    vsetivli zero, 13, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v26, a3
+; RV32-NEXT:    vslideup.vi v16, v26, 12
+; RV32-NEXT:    slli a3, a2, 18
+; RV32-NEXT:    bgez a3, .LBB27_144
+; RV32-NEXT:  .LBB27_208: # %cond.load229
+; RV32-NEXT:    lh a3, 154(a0)
+; RV32-NEXT:    vsetivli zero, 14, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v26, a3
+; RV32-NEXT:    vslideup.vi v16, v26, 13
+; RV32-NEXT:    slli a3, a2, 17
+; RV32-NEXT:    bgez a3, .LBB27_145
+; RV32-NEXT:  .LBB27_209: # %cond.load232
+; RV32-NEXT:    lh a3, 156(a0)
+; RV32-NEXT:    vsetivli zero, 15, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v26, a3
+; RV32-NEXT:    vslideup.vi v16, v26, 14
+; RV32-NEXT:    slli a3, a2, 16
+; RV32-NEXT:    bgez a3, .LBB27_146
+; RV32-NEXT:  .LBB27_210: # %cond.load235
+; RV32-NEXT:    lh a3, 158(a0)
+; RV32-NEXT:    vsetivli zero, 16, e16, m2, tu, ma
+; RV32-NEXT:    vmv.s.x v26, a3
+; RV32-NEXT:    vslideup.vi v16, v26, 15
+; RV32-NEXT:    slli a3, a2, 15
+; RV32-NEXT:    bgez a3, .LBB27_147
+; RV32-NEXT:  .LBB27_211: # %cond.load238
+; RV32-NEXT:    lh a3, 160(a0)
+; RV32-NEXT:    vsetivli zero, 17, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v28, a3
+; RV32-NEXT:    vslideup.vi v16, v28, 16
+; RV32-NEXT:    slli a3, a2, 14
+; RV32-NEXT:    bgez a3, .LBB27_148
+; RV32-NEXT:  .LBB27_212: # %cond.load241
+; RV32-NEXT:    lh a3, 162(a0)
+; RV32-NEXT:    vsetivli zero, 18, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v28, a3
+; RV32-NEXT:    vslideup.vi v16, v28, 17
+; RV32-NEXT:    slli a3, a2, 13
+; RV32-NEXT:    bgez a3, .LBB27_149
+; RV32-NEXT:  .LBB27_213: # %cond.load244
+; RV32-NEXT:    lh a3, 164(a0)
+; RV32-NEXT:    vsetivli zero, 19, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v28, a3
+; RV32-NEXT:    vslideup.vi v16, v28, 18
+; RV32-NEXT:    slli a3, a2, 12
+; RV32-NEXT:    bgez a3, .LBB27_150
+; RV32-NEXT:  .LBB27_214: # %cond.load247
+; RV32-NEXT:    lh a3, 166(a0)
+; RV32-NEXT:    vsetivli zero, 20, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v28, a3
+; RV32-NEXT:    vslideup.vi v16, v28, 19
+; RV32-NEXT:    slli a3, a2, 11
+; RV32-NEXT:    bgez a3, .LBB27_151
+; RV32-NEXT:  .LBB27_215: # %cond.load250
+; RV32-NEXT:    lh a3, 168(a0)
+; RV32-NEXT:    vsetivli zero, 21, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v28, a3
+; RV32-NEXT:    vslideup.vi v16, v28, 20
+; RV32-NEXT:    slli a3, a2, 10
+; RV32-NEXT:    bgez a3, .LBB27_152
+; RV32-NEXT:  .LBB27_216: # %cond.load253
+; RV32-NEXT:    lh a3, 170(a0)
+; RV32-NEXT:    vsetivli zero, 22, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v28, a3
+; RV32-NEXT:    vslideup.vi v16, v28, 21
+; RV32-NEXT:    slli a3, a2, 9
+; RV32-NEXT:    bgez a3, .LBB27_153
+; RV32-NEXT:  .LBB27_217: # %cond.load256
+; RV32-NEXT:    lh a3, 172(a0)
+; RV32-NEXT:    vsetivli zero, 23, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v28, a3
+; RV32-NEXT:    vslideup.vi v16, v28, 22
+; RV32-NEXT:    slli a3, a2, 8
+; RV32-NEXT:    bgez a3, .LBB27_154
+; RV32-NEXT:  .LBB27_218: # %cond.load259
+; RV32-NEXT:    lh a3, 174(a0)
+; RV32-NEXT:    vsetivli zero, 24, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v28, a3
+; RV32-NEXT:    vslideup.vi v16, v28, 23
+; RV32-NEXT:    slli a3, a2, 7
+; RV32-NEXT:    bgez a3, .LBB27_155
+; RV32-NEXT:  .LBB27_219: # %cond.load262
+; RV32-NEXT:    lh a3, 176(a0)
+; RV32-NEXT:    vsetivli zero, 25, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v28, a3
+; RV32-NEXT:    vslideup.vi v16, v28, 24
+; RV32-NEXT:    slli a3, a2, 6
+; RV32-NEXT:    bgez a3, .LBB27_156
+; RV32-NEXT:  .LBB27_220: # %cond.load265
+; RV32-NEXT:    lh a3, 178(a0)
+; RV32-NEXT:    vsetivli zero, 26, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v28, a3
+; RV32-NEXT:    vslideup.vi v16, v28, 25
+; RV32-NEXT:    slli a3, a2, 5
+; RV32-NEXT:    bgez a3, .LBB27_157
+; RV32-NEXT:  .LBB27_221: # %cond.load268
+; RV32-NEXT:    lh a3, 180(a0)
+; RV32-NEXT:    vsetivli zero, 27, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v28, a3
+; RV32-NEXT:    vslideup.vi v16, v28, 26
+; RV32-NEXT:    slli a3, a2, 4
+; RV32-NEXT:    bgez a3, .LBB27_158
+; RV32-NEXT:  .LBB27_222: # %cond.load271
+; RV32-NEXT:    lh a3, 182(a0)
+; RV32-NEXT:    vsetivli zero, 28, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v28, a3
+; RV32-NEXT:    vslideup.vi v16, v28, 27
+; RV32-NEXT:    slli a3, a2, 3
+; RV32-NEXT:    bgez a3, .LBB27_159
+; RV32-NEXT:  .LBB27_223: # %cond.load274
+; RV32-NEXT:    lh a3, 184(a0)
+; RV32-NEXT:    vsetivli zero, 29, e16, m4, tu, ma
+; RV32-NEXT:    vmv.s.x v28, a3
+; RV32-NEXT:    vslideup.vi v16, v28, 28
+; RV32-NEXT:    slli a3, a2, 2
+; RV32-NEXT:    bltz a3, .LBB27_160
+; RV32-NEXT:    j .LBB27_161
+; RV32-NEXT:  .LBB27_224: # %cond.load283
+; RV32-NEXT:    lh a2, 190(a0)
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 32
+; RV32-NEXT:    vsetvli zero, a2, e16, m4, tu, ma
+; RV32-NEXT:    vslideup.vi v16, v24, 31
+; RV32-NEXT:    andi a2, a1, 1
+; RV32-NEXT:    beqz a2, .LBB27_165
+; RV32-NEXT:  .LBB27_225: # %cond.load286
+; RV32-NEXT:    lh a2, 192(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 33
+; RV32-NEXT:    li a3, 32
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    andi a2, a1, 2
+; RV32-NEXT:    beqz a2, .LBB27_166
+; RV32-NEXT:  .LBB27_226: # %cond.load289
+; RV32-NEXT:    lh a2, 194(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 34
+; RV32-NEXT:    li a3, 33
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    andi a2, a1, 4
+; RV32-NEXT:    beqz a2, .LBB27_167
+; RV32-NEXT:  .LBB27_227: # %cond.load292
+; RV32-NEXT:    lh a2, 196(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 35
+; RV32-NEXT:    li a3, 34
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    andi a2, a1, 8
+; RV32-NEXT:    beqz a2, .LBB27_168
+; RV32-NEXT:  .LBB27_228: # %cond.load295
+; RV32-NEXT:    lh a2, 198(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 36
+; RV32-NEXT:    li a3, 35
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    andi a2, a1, 16
+; RV32-NEXT:    beqz a2, .LBB27_169
+; RV32-NEXT:  .LBB27_229: # %cond.load298
+; RV32-NEXT:    lh a2, 200(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 37
+; RV32-NEXT:    li a3, 36
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    andi a2, a1, 32
+; RV32-NEXT:    beqz a2, .LBB27_170
+; RV32-NEXT:  .LBB27_230: # %cond.load301
+; RV32-NEXT:    lh a2, 202(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 38
+; RV32-NEXT:    li a3, 37
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    andi a2, a1, 64
+; RV32-NEXT:    beqz a2, .LBB27_171
+; RV32-NEXT:  .LBB27_231: # %cond.load304
+; RV32-NEXT:    lh a2, 204(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 39
+; RV32-NEXT:    li a3, 38
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    andi a2, a1, 128
+; RV32-NEXT:    beqz a2, .LBB27_172
+; RV32-NEXT:  .LBB27_232: # %cond.load307
+; RV32-NEXT:    lh a2, 206(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 40
+; RV32-NEXT:    li a3, 39
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    andi a2, a1, 256
+; RV32-NEXT:    beqz a2, .LBB27_173
+; RV32-NEXT:  .LBB27_233: # %cond.load310
+; RV32-NEXT:    lh a2, 208(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 41
+; RV32-NEXT:    li a3, 40
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    andi a2, a1, 512
+; RV32-NEXT:    beqz a2, .LBB27_174
+; RV32-NEXT:  .LBB27_234: # %cond.load313
+; RV32-NEXT:    lh a2, 210(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 42
+; RV32-NEXT:    li a3, 41
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    andi a2, a1, 1024
+; RV32-NEXT:    beqz a2, .LBB27_175
+; RV32-NEXT:  .LBB27_235: # %cond.load316
+; RV32-NEXT:    lh a2, 212(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 43
+; RV32-NEXT:    li a3, 42
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    slli a2, a1, 20
+; RV32-NEXT:    bgez a2, .LBB27_176
+; RV32-NEXT:  .LBB27_236: # %cond.load319
+; RV32-NEXT:    lh a2, 214(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 44
+; RV32-NEXT:    li a3, 43
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    slli a2, a1, 19
+; RV32-NEXT:    bgez a2, .LBB27_177
+; RV32-NEXT:  .LBB27_237: # %cond.load322
+; RV32-NEXT:    lh a2, 216(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 45
+; RV32-NEXT:    li a3, 44
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    slli a2, a1, 18
+; RV32-NEXT:    bgez a2, .LBB27_178
+; RV32-NEXT:  .LBB27_238: # %cond.load325
+; RV32-NEXT:    lh a2, 218(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 46
+; RV32-NEXT:    li a3, 45
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    slli a2, a1, 17
+; RV32-NEXT:    bgez a2, .LBB27_179
+; RV32-NEXT:  .LBB27_239: # %cond.load328
+; RV32-NEXT:    lh a2, 220(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 47
+; RV32-NEXT:    li a3, 46
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    slli a2, a1, 16
+; RV32-NEXT:    bgez a2, .LBB27_180
+; RV32-NEXT:  .LBB27_240: # %cond.load331
+; RV32-NEXT:    lh a2, 222(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 48
+; RV32-NEXT:    li a3, 47
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    slli a2, a1, 15
+; RV32-NEXT:    bgez a2, .LBB27_181
+; RV32-NEXT:  .LBB27_241: # %cond.load334
+; RV32-NEXT:    lh a2, 224(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 49
+; RV32-NEXT:    li a3, 48
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    slli a2, a1, 14
+; RV32-NEXT:    bgez a2, .LBB27_182
+; RV32-NEXT:  .LBB27_242: # %cond.load337
+; RV32-NEXT:    lh a2, 226(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 50
+; RV32-NEXT:    li a3, 49
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    slli a2, a1, 13
+; RV32-NEXT:    bgez a2, .LBB27_183
+; RV32-NEXT:  .LBB27_243: # %cond.load340
+; RV32-NEXT:    lh a2, 228(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 51
+; RV32-NEXT:    li a3, 50
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    slli a2, a1, 12
+; RV32-NEXT:    bgez a2, .LBB27_184
+; RV32-NEXT:  .LBB27_244: # %cond.load343
+; RV32-NEXT:    lh a2, 230(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 52
+; RV32-NEXT:    li a3, 51
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    slli a2, a1, 11
+; RV32-NEXT:    bgez a2, .LBB27_185
+; RV32-NEXT:  .LBB27_245: # %cond.load346
+; RV32-NEXT:    lh a2, 232(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 53
+; RV32-NEXT:    li a3, 52
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    slli a2, a1, 10
+; RV32-NEXT:    bgez a2, .LBB27_186
+; RV32-NEXT:  .LBB27_246: # %cond.load349
+; RV32-NEXT:    lh a2, 234(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 54
+; RV32-NEXT:    li a3, 53
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    slli a2, a1, 9
+; RV32-NEXT:    bgez a2, .LBB27_187
+; RV32-NEXT:  .LBB27_247: # %cond.load352
+; RV32-NEXT:    lh a2, 236(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 55
+; RV32-NEXT:    li a3, 54
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    slli a2, a1, 8
+; RV32-NEXT:    bgez a2, .LBB27_188
+; RV32-NEXT:  .LBB27_248: # %cond.load355
+; RV32-NEXT:    lh a2, 238(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 56
+; RV32-NEXT:    li a3, 55
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    slli a2, a1, 7
+; RV32-NEXT:    bgez a2, .LBB27_189
+; RV32-NEXT:  .LBB27_249: # %cond.load358
+; RV32-NEXT:    lh a2, 240(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 57
+; RV32-NEXT:    li a3, 56
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    slli a2, a1, 6
+; RV32-NEXT:    bgez a2, .LBB27_190
+; RV32-NEXT:  .LBB27_250: # %cond.load361
+; RV32-NEXT:    lh a2, 242(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 58
+; RV32-NEXT:    li a3, 57
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    slli a2, a1, 5
+; RV32-NEXT:    bgez a2, .LBB27_191
+; RV32-NEXT:  .LBB27_251: # %cond.load364
+; RV32-NEXT:    lh a2, 244(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 59
+; RV32-NEXT:    li a3, 58
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    slli a2, a1, 4
+; RV32-NEXT:    bgez a2, .LBB27_192
+; RV32-NEXT:  .LBB27_252: # %cond.load367
+; RV32-NEXT:    lh a2, 246(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 60
+; RV32-NEXT:    li a3, 59
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    slli a2, a1, 3
+; RV32-NEXT:    bgez a2, .LBB27_193
+; RV32-NEXT:  .LBB27_253: # %cond.load370
+; RV32-NEXT:    lh a2, 248(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 61
+; RV32-NEXT:    li a3, 60
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    slli a2, a1, 2
+; RV32-NEXT:    bgez a2, .LBB27_194
+; RV32-NEXT:  .LBB27_254: # %cond.load373
+; RV32-NEXT:    lh a2, 250(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 62
+; RV32-NEXT:    li a3, 61
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    slli a2, a1, 1
+; RV32-NEXT:    bgez a2, .LBB27_195
+; RV32-NEXT:  .LBB27_255: # %cond.load376
+; RV32-NEXT:    lh a2, 252(a0)
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a2
+; RV32-NEXT:    li a2, 63
+; RV32-NEXT:    li a3, 62
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-NEXT:    vslideup.vx v16, v24, a3
+; RV32-NEXT:    bgez a1, .LBB27_196
+; RV32-NEXT:  .LBB27_256: # %cond.load379
+; RV32-NEXT:    lh a0, 254(a0)
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vmv.s.x v24, a0
+; RV32-NEXT:    li a0, 63
+; RV32-NEXT:    vslideup.vx v16, v24, a0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: masked_load_v128bf16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT:    vmv.x.s a2, v0
+; RV64-NEXT:    andi a1, a2, 1
+; RV64-NEXT:    beqz a1, .LBB27_2
+; RV64-NEXT:  # %bb.1: # %cond.load
+; RV64-NEXT:    lh a1, 0(a0)
+; RV64-NEXT:    fmv.x.h a3, fa5
+; RV64-NEXT:    li a4, 64
+; RV64-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a3
+; RV64-NEXT:    vsetvli zero, zero, e16, m8, tu, ma
+; RV64-NEXT:    vmv.s.x v8, a1
+; RV64-NEXT:    andi a1, a2, 2
+; RV64-NEXT:    bnez a1, .LBB27_3
+; RV64-NEXT:    j .LBB27_4
+; RV64-NEXT:  .LBB27_2:
+; RV64-NEXT:    # implicit-def: $v8m8
+; RV64-NEXT:    andi a1, a2, 2
+; RV64-NEXT:    beqz a1, .LBB27_4
+; RV64-NEXT:  .LBB27_3: # %cond.load1
+; RV64-NEXT:    lh a1, 2(a0)
+; RV64-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vslideup.vi v8, v16, 1
+; RV64-NEXT:  .LBB27_4: # %else2
+; RV64-NEXT:    andi a1, a2, 4
+; RV64-NEXT:    bnez a1, .LBB27_71
+; RV64-NEXT:  # %bb.5: # %else5
+; RV64-NEXT:    andi a1, a2, 8
+; RV64-NEXT:    bnez a1, .LBB27_72
+; RV64-NEXT:  .LBB27_6: # %else8
+; RV64-NEXT:    andi a1, a2, 16
+; RV64-NEXT:    bnez a1, .LBB27_73
+; RV64-NEXT:  .LBB27_7: # %else11
+; RV64-NEXT:    andi a1, a2, 32
+; RV64-NEXT:    bnez a1, .LBB27_74
+; RV64-NEXT:  .LBB27_8: # %else14
+; RV64-NEXT:    andi a1, a2, 64
+; RV64-NEXT:    bnez a1, .LBB27_75
+; RV64-NEXT:  .LBB27_9: # %else17
+; RV64-NEXT:    andi a1, a2, 128
+; RV64-NEXT:    bnez a1, .LBB27_76
+; RV64-NEXT:  .LBB27_10: # %else20
+; RV64-NEXT:    andi a1, a2, 256
+; RV64-NEXT:    bnez a1, .LBB27_77
+; RV64-NEXT:  .LBB27_11: # %else23
+; RV64-NEXT:    andi a1, a2, 512
+; RV64-NEXT:    bnez a1, .LBB27_78
+; RV64-NEXT:  .LBB27_12: # %else26
+; RV64-NEXT:    andi a1, a2, 1024
+; RV64-NEXT:    bnez a1, .LBB27_79
+; RV64-NEXT:  .LBB27_13: # %else29
+; RV64-NEXT:    slli a1, a2, 52
+; RV64-NEXT:    bltz a1, .LBB27_80
+; RV64-NEXT:  .LBB27_14: # %else32
+; RV64-NEXT:    slli a1, a2, 51
+; RV64-NEXT:    bltz a1, .LBB27_81
+; RV64-NEXT:  .LBB27_15: # %else35
+; RV64-NEXT:    slli a1, a2, 50
+; RV64-NEXT:    bltz a1, .LBB27_82
+; RV64-NEXT:  .LBB27_16: # %else38
+; RV64-NEXT:    slli a1, a2, 49
+; RV64-NEXT:    bltz a1, .LBB27_83
+; RV64-NEXT:  .LBB27_17: # %else41
+; RV64-NEXT:    slli a1, a2, 48
+; RV64-NEXT:    bltz a1, .LBB27_84
+; RV64-NEXT:  .LBB27_18: # %else44
+; RV64-NEXT:    slli a1, a2, 47
+; RV64-NEXT:    bltz a1, .LBB27_85
+; RV64-NEXT:  .LBB27_19: # %else47
+; RV64-NEXT:    slli a1, a2, 46
+; RV64-NEXT:    bltz a1, .LBB27_86
+; RV64-NEXT:  .LBB27_20: # %else50
+; RV64-NEXT:    slli a1, a2, 45
+; RV64-NEXT:    bltz a1, .LBB27_87
+; RV64-NEXT:  .LBB27_21: # %else53
+; RV64-NEXT:    slli a1, a2, 44
+; RV64-NEXT:    bltz a1, .LBB27_88
+; RV64-NEXT:  .LBB27_22: # %else56
+; RV64-NEXT:    slli a1, a2, 43
+; RV64-NEXT:    bltz a1, .LBB27_89
+; RV64-NEXT:  .LBB27_23: # %else59
+; RV64-NEXT:    slli a1, a2, 42
+; RV64-NEXT:    bltz a1, .LBB27_90
+; RV64-NEXT:  .LBB27_24: # %else62
+; RV64-NEXT:    slli a1, a2, 41
+; RV64-NEXT:    bltz a1, .LBB27_91
+; RV64-NEXT:  .LBB27_25: # %else65
+; RV64-NEXT:    slli a1, a2, 40
+; RV64-NEXT:    bltz a1, .LBB27_92
+; RV64-NEXT:  .LBB27_26: # %else68
+; RV64-NEXT:    slli a1, a2, 39
+; RV64-NEXT:    bltz a1, .LBB27_93
+; RV64-NEXT:  .LBB27_27: # %else71
+; RV64-NEXT:    slli a1, a2, 38
+; RV64-NEXT:    bltz a1, .LBB27_94
+; RV64-NEXT:  .LBB27_28: # %else74
+; RV64-NEXT:    slli a1, a2, 37
+; RV64-NEXT:    bltz a1, .LBB27_95
+; RV64-NEXT:  .LBB27_29: # %else77
+; RV64-NEXT:    slli a1, a2, 36
+; RV64-NEXT:    bltz a1, .LBB27_96
+; RV64-NEXT:  .LBB27_30: # %else80
+; RV64-NEXT:    slli a1, a2, 35
+; RV64-NEXT:    bltz a1, .LBB27_97
+; RV64-NEXT:  .LBB27_31: # %else83
+; RV64-NEXT:    slli a1, a2, 34
+; RV64-NEXT:    bltz a1, .LBB27_98
+; RV64-NEXT:  .LBB27_32: # %else86
+; RV64-NEXT:    slli a1, a2, 33
+; RV64-NEXT:    bltz a1, .LBB27_99
+; RV64-NEXT:  .LBB27_33: # %else89
+; RV64-NEXT:    slli a1, a2, 32
+; RV64-NEXT:    bltz a1, .LBB27_100
+; RV64-NEXT:  .LBB27_34: # %else92
+; RV64-NEXT:    slli a1, a2, 31
+; RV64-NEXT:    bltz a1, .LBB27_101
+; RV64-NEXT:  .LBB27_35: # %else95
+; RV64-NEXT:    slli a1, a2, 30
+; RV64-NEXT:    bltz a1, .LBB27_102
+; RV64-NEXT:  .LBB27_36: # %else98
+; RV64-NEXT:    slli a1, a2, 29
+; RV64-NEXT:    bltz a1, .LBB27_103
+; RV64-NEXT:  .LBB27_37: # %else101
+; RV64-NEXT:    slli a1, a2, 28
+; RV64-NEXT:    bltz a1, .LBB27_104
+; RV64-NEXT:  .LBB27_38: # %else104
+; RV64-NEXT:    slli a1, a2, 27
+; RV64-NEXT:    bltz a1, .LBB27_105
+; RV64-NEXT:  .LBB27_39: # %else107
+; RV64-NEXT:    slli a1, a2, 26
+; RV64-NEXT:    bltz a1, .LBB27_106
+; RV64-NEXT:  .LBB27_40: # %else110
+; RV64-NEXT:    slli a1, a2, 25
+; RV64-NEXT:    bltz a1, .LBB27_107
+; RV64-NEXT:  .LBB27_41: # %else113
+; RV64-NEXT:    slli a1, a2, 24
+; RV64-NEXT:    bltz a1, .LBB27_108
+; RV64-NEXT:  .LBB27_42: # %else116
+; RV64-NEXT:    slli a1, a2, 23
+; RV64-NEXT:    bltz a1, .LBB27_109
+; RV64-NEXT:  .LBB27_43: # %else119
+; RV64-NEXT:    slli a1, a2, 22
+; RV64-NEXT:    bltz a1, .LBB27_110
+; RV64-NEXT:  .LBB27_44: # %else122
+; RV64-NEXT:    slli a1, a2, 21
+; RV64-NEXT:    bltz a1, .LBB27_111
+; RV64-NEXT:  .LBB27_45: # %else125
+; RV64-NEXT:    slli a1, a2, 20
+; RV64-NEXT:    bltz a1, .LBB27_112
+; RV64-NEXT:  .LBB27_46: # %else128
+; RV64-NEXT:    slli a1, a2, 19
+; RV64-NEXT:    bltz a1, .LBB27_113
+; RV64-NEXT:  .LBB27_47: # %else131
+; RV64-NEXT:    slli a1, a2, 18
+; RV64-NEXT:    bltz a1, .LBB27_114
+; RV64-NEXT:  .LBB27_48: # %else134
+; RV64-NEXT:    slli a1, a2, 17
+; RV64-NEXT:    bltz a1, .LBB27_115
+; RV64-NEXT:  .LBB27_49: # %else137
+; RV64-NEXT:    slli a1, a2, 16
+; RV64-NEXT:    bltz a1, .LBB27_116
+; RV64-NEXT:  .LBB27_50: # %else140
+; RV64-NEXT:    slli a1, a2, 15
+; RV64-NEXT:    bltz a1, .LBB27_117
+; RV64-NEXT:  .LBB27_51: # %else143
+; RV64-NEXT:    slli a1, a2, 14
+; RV64-NEXT:    bltz a1, .LBB27_118
+; RV64-NEXT:  .LBB27_52: # %else146
+; RV64-NEXT:    slli a1, a2, 13
+; RV64-NEXT:    bltz a1, .LBB27_119
+; RV64-NEXT:  .LBB27_53: # %else149
+; RV64-NEXT:    slli a1, a2, 12
+; RV64-NEXT:    bltz a1, .LBB27_120
+; RV64-NEXT:  .LBB27_54: # %else152
+; RV64-NEXT:    slli a1, a2, 11
+; RV64-NEXT:    bltz a1, .LBB27_121
+; RV64-NEXT:  .LBB27_55: # %else155
+; RV64-NEXT:    slli a1, a2, 10
+; RV64-NEXT:    bltz a1, .LBB27_122
+; RV64-NEXT:  .LBB27_56: # %else158
+; RV64-NEXT:    slli a1, a2, 9
+; RV64-NEXT:    bltz a1, .LBB27_123
+; RV64-NEXT:  .LBB27_57: # %else161
+; RV64-NEXT:    slli a1, a2, 8
+; RV64-NEXT:    bltz a1, .LBB27_124
+; RV64-NEXT:  .LBB27_58: # %else164
+; RV64-NEXT:    slli a1, a2, 7
+; RV64-NEXT:    bltz a1, .LBB27_125
+; RV64-NEXT:  .LBB27_59: # %else167
+; RV64-NEXT:    slli a1, a2, 6
+; RV64-NEXT:    bltz a1, .LBB27_126
+; RV64-NEXT:  .LBB27_60: # %else170
+; RV64-NEXT:    slli a1, a2, 5
+; RV64-NEXT:    bltz a1, .LBB27_127
+; RV64-NEXT:  .LBB27_61: # %else173
+; RV64-NEXT:    slli a1, a2, 4
+; RV64-NEXT:    bltz a1, .LBB27_128
+; RV64-NEXT:  .LBB27_62: # %else176
+; RV64-NEXT:    slli a1, a2, 3
+; RV64-NEXT:    bltz a1, .LBB27_129
+; RV64-NEXT:  .LBB27_63: # %else179
+; RV64-NEXT:    slli a1, a2, 2
+; RV64-NEXT:    bgez a1, .LBB27_65
+; RV64-NEXT:  .LBB27_64: # %cond.load181
+; RV64-NEXT:    lh a1, 122(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 62
+; RV64-NEXT:    li a3, 61
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:  .LBB27_65: # %else182
+; RV64-NEXT:    slli a1, a2, 1
+; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v16, v0, 1
+; RV64-NEXT:    bgez a1, .LBB27_67
+; RV64-NEXT:  # %bb.66: # %cond.load184
+; RV64-NEXT:    lh a1, 124(a0)
+; RV64-NEXT:    vmv.s.x v24, a1
+; RV64-NEXT:    li a1, 63
+; RV64-NEXT:    li a3, 62
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v24, a3
+; RV64-NEXT:  .LBB27_67: # %else185
+; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT:    vmv.x.s a1, v16
+; RV64-NEXT:    bgez a2, .LBB27_69
+; RV64-NEXT:  # %bb.68: # %cond.load187
+; RV64-NEXT:    lh a2, 126(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    li a2, 63
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a2
+; RV64-NEXT:  .LBB27_69: # %else188
+; RV64-NEXT:    andi a2, a1, 1
+; RV64-NEXT:    beqz a2, .LBB27_130
+; RV64-NEXT:  # %bb.70: # %cond.load190
+; RV64-NEXT:    lh a2, 128(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a2
+; RV64-NEXT:    andi a2, a1, 2
+; RV64-NEXT:    bnez a2, .LBB27_131
+; RV64-NEXT:    j .LBB27_132
+; RV64-NEXT:  .LBB27_71: # %cond.load4
+; RV64-NEXT:    lh a1, 4(a0)
+; RV64-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vslideup.vi v8, v16, 2
+; RV64-NEXT:    andi a1, a2, 8
+; RV64-NEXT:    beqz a1, .LBB27_6
+; RV64-NEXT:  .LBB27_72: # %cond.load7
+; RV64-NEXT:    lh a1, 6(a0)
+; RV64-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vslideup.vi v8, v16, 3
+; RV64-NEXT:    andi a1, a2, 16
+; RV64-NEXT:    beqz a1, .LBB27_7
+; RV64-NEXT:  .LBB27_73: # %cond.load10
+; RV64-NEXT:    lh a1, 8(a0)
+; RV64-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vslideup.vi v8, v16, 4
+; RV64-NEXT:    andi a1, a2, 32
+; RV64-NEXT:    beqz a1, .LBB27_8
+; RV64-NEXT:  .LBB27_74: # %cond.load13
+; RV64-NEXT:    lh a1, 10(a0)
+; RV64-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vslideup.vi v8, v16, 5
+; RV64-NEXT:    andi a1, a2, 64
+; RV64-NEXT:    beqz a1, .LBB27_9
+; RV64-NEXT:  .LBB27_75: # %cond.load16
+; RV64-NEXT:    lh a1, 12(a0)
+; RV64-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vslideup.vi v8, v16, 6
+; RV64-NEXT:    andi a1, a2, 128
+; RV64-NEXT:    beqz a1, .LBB27_10
+; RV64-NEXT:  .LBB27_76: # %cond.load19
+; RV64-NEXT:    lh a1, 14(a0)
+; RV64-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vslideup.vi v8, v16, 7
+; RV64-NEXT:    andi a1, a2, 256
+; RV64-NEXT:    beqz a1, .LBB27_11
+; RV64-NEXT:  .LBB27_77: # %cond.load22
+; RV64-NEXT:    lh a1, 16(a0)
+; RV64-NEXT:    vsetivli zero, 9, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vslideup.vi v8, v16, 8
+; RV64-NEXT:    andi a1, a2, 512
+; RV64-NEXT:    beqz a1, .LBB27_12
+; RV64-NEXT:  .LBB27_78: # %cond.load25
+; RV64-NEXT:    lh a1, 18(a0)
+; RV64-NEXT:    vsetivli zero, 10, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vslideup.vi v8, v16, 9
+; RV64-NEXT:    andi a1, a2, 1024
+; RV64-NEXT:    beqz a1, .LBB27_13
+; RV64-NEXT:  .LBB27_79: # %cond.load28
+; RV64-NEXT:    lh a1, 20(a0)
+; RV64-NEXT:    vsetivli zero, 11, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vslideup.vi v8, v16, 10
+; RV64-NEXT:    slli a1, a2, 52
+; RV64-NEXT:    bgez a1, .LBB27_14
+; RV64-NEXT:  .LBB27_80: # %cond.load31
+; RV64-NEXT:    lh a1, 22(a0)
+; RV64-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vslideup.vi v8, v16, 11
+; RV64-NEXT:    slli a1, a2, 51
+; RV64-NEXT:    bgez a1, .LBB27_15
+; RV64-NEXT:  .LBB27_81: # %cond.load34
+; RV64-NEXT:    lh a1, 24(a0)
+; RV64-NEXT:    vsetivli zero, 13, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vslideup.vi v8, v16, 12
+; RV64-NEXT:    slli a1, a2, 50
+; RV64-NEXT:    bgez a1, .LBB27_16
+; RV64-NEXT:  .LBB27_82: # %cond.load37
+; RV64-NEXT:    lh a1, 26(a0)
+; RV64-NEXT:    vsetivli zero, 14, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vslideup.vi v8, v16, 13
+; RV64-NEXT:    slli a1, a2, 49
+; RV64-NEXT:    bgez a1, .LBB27_17
+; RV64-NEXT:  .LBB27_83: # %cond.load40
+; RV64-NEXT:    lh a1, 28(a0)
+; RV64-NEXT:    vsetivli zero, 15, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vslideup.vi v8, v16, 14
+; RV64-NEXT:    slli a1, a2, 48
+; RV64-NEXT:    bgez a1, .LBB27_18
+; RV64-NEXT:  .LBB27_84: # %cond.load43
+; RV64-NEXT:    lh a1, 30(a0)
+; RV64-NEXT:    vsetivli zero, 16, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vslideup.vi v8, v16, 15
+; RV64-NEXT:    slli a1, a2, 47
+; RV64-NEXT:    bgez a1, .LBB27_19
+; RV64-NEXT:  .LBB27_85: # %cond.load46
+; RV64-NEXT:    lh a1, 32(a0)
+; RV64-NEXT:    vsetivli zero, 17, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vslideup.vi v8, v16, 16
+; RV64-NEXT:    slli a1, a2, 46
+; RV64-NEXT:    bgez a1, .LBB27_20
+; RV64-NEXT:  .LBB27_86: # %cond.load49
+; RV64-NEXT:    lh a1, 34(a0)
+; RV64-NEXT:    vsetivli zero, 18, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vslideup.vi v8, v16, 17
+; RV64-NEXT:    slli a1, a2, 45
+; RV64-NEXT:    bgez a1, .LBB27_21
+; RV64-NEXT:  .LBB27_87: # %cond.load52
+; RV64-NEXT:    lh a1, 36(a0)
+; RV64-NEXT:    vsetivli zero, 19, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vslideup.vi v8, v16, 18
+; RV64-NEXT:    slli a1, a2, 44
+; RV64-NEXT:    bgez a1, .LBB27_22
+; RV64-NEXT:  .LBB27_88: # %cond.load55
+; RV64-NEXT:    lh a1, 38(a0)
+; RV64-NEXT:    vsetivli zero, 20, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vslideup.vi v8, v16, 19
+; RV64-NEXT:    slli a1, a2, 43
+; RV64-NEXT:    bgez a1, .LBB27_23
+; RV64-NEXT:  .LBB27_89: # %cond.load58
+; RV64-NEXT:    lh a1, 40(a0)
+; RV64-NEXT:    vsetivli zero, 21, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vslideup.vi v8, v16, 20
+; RV64-NEXT:    slli a1, a2, 42
+; RV64-NEXT:    bgez a1, .LBB27_24
+; RV64-NEXT:  .LBB27_90: # %cond.load61
+; RV64-NEXT:    lh a1, 42(a0)
+; RV64-NEXT:    vsetivli zero, 22, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vslideup.vi v8, v16, 21
+; RV64-NEXT:    slli a1, a2, 41
+; RV64-NEXT:    bgez a1, .LBB27_25
+; RV64-NEXT:  .LBB27_91: # %cond.load64
+; RV64-NEXT:    lh a1, 44(a0)
+; RV64-NEXT:    vsetivli zero, 23, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vslideup.vi v8, v16, 22
+; RV64-NEXT:    slli a1, a2, 40
+; RV64-NEXT:    bgez a1, .LBB27_26
+; RV64-NEXT:  .LBB27_92: # %cond.load67
+; RV64-NEXT:    lh a1, 46(a0)
+; RV64-NEXT:    vsetivli zero, 24, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vslideup.vi v8, v16, 23
+; RV64-NEXT:    slli a1, a2, 39
+; RV64-NEXT:    bgez a1, .LBB27_27
+; RV64-NEXT:  .LBB27_93: # %cond.load70
+; RV64-NEXT:    lh a1, 48(a0)
+; RV64-NEXT:    vsetivli zero, 25, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vslideup.vi v8, v16, 24
+; RV64-NEXT:    slli a1, a2, 38
+; RV64-NEXT:    bgez a1, .LBB27_28
+; RV64-NEXT:  .LBB27_94: # %cond.load73
+; RV64-NEXT:    lh a1, 50(a0)
+; RV64-NEXT:    vsetivli zero, 26, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vslideup.vi v8, v16, 25
+; RV64-NEXT:    slli a1, a2, 37
+; RV64-NEXT:    bgez a1, .LBB27_29
+; RV64-NEXT:  .LBB27_95: # %cond.load76
+; RV64-NEXT:    lh a1, 52(a0)
+; RV64-NEXT:    vsetivli zero, 27, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vmv8r.v v24, v8
+; RV64-NEXT:    vslideup.vi v8, v16, 26
+; RV64-NEXT:    vmv4r.v v24, v8
+; RV64-NEXT:    vmv8r.v v8, v24
+; RV64-NEXT:    slli a1, a2, 36
+; RV64-NEXT:    bgez a1, .LBB27_30
+; RV64-NEXT:  .LBB27_96: # %cond.load79
+; RV64-NEXT:    lh a1, 54(a0)
+; RV64-NEXT:    vsetivli zero, 28, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vmv8r.v v24, v8
+; RV64-NEXT:    vslideup.vi v8, v16, 27
+; RV64-NEXT:    vmv4r.v v24, v8
+; RV64-NEXT:    vmv8r.v v8, v24
+; RV64-NEXT:    slli a1, a2, 35
+; RV64-NEXT:    bgez a1, .LBB27_31
+; RV64-NEXT:  .LBB27_97: # %cond.load82
+; RV64-NEXT:    lh a1, 56(a0)
+; RV64-NEXT:    vsetivli zero, 29, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vmv8r.v v24, v8
+; RV64-NEXT:    vslideup.vi v8, v16, 28
+; RV64-NEXT:    vmv4r.v v24, v8
+; RV64-NEXT:    vmv8r.v v8, v24
+; RV64-NEXT:    slli a1, a2, 34
+; RV64-NEXT:    bgez a1, .LBB27_32
+; RV64-NEXT:  .LBB27_98: # %cond.load85
+; RV64-NEXT:    lh a1, 58(a0)
+; RV64-NEXT:    vsetivli zero, 30, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vmv8r.v v24, v8
+; RV64-NEXT:    vslideup.vi v8, v16, 29
+; RV64-NEXT:    vmv4r.v v24, v8
+; RV64-NEXT:    vmv8r.v v8, v24
+; RV64-NEXT:    slli a1, a2, 33
+; RV64-NEXT:    bgez a1, .LBB27_33
+; RV64-NEXT:  .LBB27_99: # %cond.load88
+; RV64-NEXT:    lh a1, 60(a0)
+; RV64-NEXT:    vsetivli zero, 31, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vmv8r.v v24, v8
+; RV64-NEXT:    vslideup.vi v8, v16, 30
+; RV64-NEXT:    vmv4r.v v24, v8
+; RV64-NEXT:    vmv8r.v v8, v24
+; RV64-NEXT:    slli a1, a2, 32
+; RV64-NEXT:    bgez a1, .LBB27_34
+; RV64-NEXT:  .LBB27_100: # %cond.load91
+; RV64-NEXT:    lh a1, 62(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    vmv8r.v v24, v8
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vsetvli zero, a1, e16, m4, tu, ma
+; RV64-NEXT:    vslideup.vi v8, v16, 31
+; RV64-NEXT:    vmv4r.v v24, v8
+; RV64-NEXT:    vmv8r.v v8, v24
+; RV64-NEXT:    slli a1, a2, 31
+; RV64-NEXT:    bgez a1, .LBB27_35
+; RV64-NEXT:  .LBB27_101: # %cond.load94
+; RV64-NEXT:    lh a1, 64(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 33
+; RV64-NEXT:    li a3, 32
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 30
+; RV64-NEXT:    bgez a1, .LBB27_36
+; RV64-NEXT:  .LBB27_102: # %cond.load97
+; RV64-NEXT:    lh a1, 66(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 34
+; RV64-NEXT:    li a3, 33
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 29
+; RV64-NEXT:    bgez a1, .LBB27_37
+; RV64-NEXT:  .LBB27_103: # %cond.load100
+; RV64-NEXT:    lh a1, 68(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 35
+; RV64-NEXT:    li a3, 34
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 28
+; RV64-NEXT:    bgez a1, .LBB27_38
+; RV64-NEXT:  .LBB27_104: # %cond.load103
+; RV64-NEXT:    lh a1, 70(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 36
+; RV64-NEXT:    li a3, 35
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 27
+; RV64-NEXT:    bgez a1, .LBB27_39
+; RV64-NEXT:  .LBB27_105: # %cond.load106
+; RV64-NEXT:    lh a1, 72(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 37
+; RV64-NEXT:    li a3, 36
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 26
+; RV64-NEXT:    bgez a1, .LBB27_40
+; RV64-NEXT:  .LBB27_106: # %cond.load109
+; RV64-NEXT:    lh a1, 74(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 38
+; RV64-NEXT:    li a3, 37
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 25
+; RV64-NEXT:    bgez a1, .LBB27_41
+; RV64-NEXT:  .LBB27_107: # %cond.load112
+; RV64-NEXT:    lh a1, 76(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 39
+; RV64-NEXT:    li a3, 38
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 24
+; RV64-NEXT:    bgez a1, .LBB27_42
+; RV64-NEXT:  .LBB27_108: # %cond.load115
+; RV64-NEXT:    lh a1, 78(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 40
+; RV64-NEXT:    li a3, 39
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 23
+; RV64-NEXT:    bgez a1, .LBB27_43
+; RV64-NEXT:  .LBB27_109: # %cond.load118
+; RV64-NEXT:    lh a1, 80(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 41
+; RV64-NEXT:    li a3, 40
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 22
+; RV64-NEXT:    bgez a1, .LBB27_44
+; RV64-NEXT:  .LBB27_110: # %cond.load121
+; RV64-NEXT:    lh a1, 82(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 42
+; RV64-NEXT:    li a3, 41
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 21
+; RV64-NEXT:    bgez a1, .LBB27_45
+; RV64-NEXT:  .LBB27_111: # %cond.load124
+; RV64-NEXT:    lh a1, 84(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 43
+; RV64-NEXT:    li a3, 42
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 20
+; RV64-NEXT:    bgez a1, .LBB27_46
+; RV64-NEXT:  .LBB27_112: # %cond.load127
+; RV64-NEXT:    lh a1, 86(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 44
+; RV64-NEXT:    li a3, 43
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 19
+; RV64-NEXT:    bgez a1, .LBB27_47
+; RV64-NEXT:  .LBB27_113: # %cond.load130
+; RV64-NEXT:    lh a1, 88(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 45
+; RV64-NEXT:    li a3, 44
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 18
+; RV64-NEXT:    bgez a1, .LBB27_48
+; RV64-NEXT:  .LBB27_114: # %cond.load133
+; RV64-NEXT:    lh a1, 90(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 46
+; RV64-NEXT:    li a3, 45
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 17
+; RV64-NEXT:    bgez a1, .LBB27_49
+; RV64-NEXT:  .LBB27_115: # %cond.load136
+; RV64-NEXT:    lh a1, 92(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 47
+; RV64-NEXT:    li a3, 46
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 16
+; RV64-NEXT:    bgez a1, .LBB27_50
+; RV64-NEXT:  .LBB27_116: # %cond.load139
+; RV64-NEXT:    lh a1, 94(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 48
+; RV64-NEXT:    li a3, 47
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 15
+; RV64-NEXT:    bgez a1, .LBB27_51
+; RV64-NEXT:  .LBB27_117: # %cond.load142
+; RV64-NEXT:    lh a1, 96(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 49
+; RV64-NEXT:    li a3, 48
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 14
+; RV64-NEXT:    bgez a1, .LBB27_52
+; RV64-NEXT:  .LBB27_118: # %cond.load145
+; RV64-NEXT:    lh a1, 98(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 50
+; RV64-NEXT:    li a3, 49
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 13
+; RV64-NEXT:    bgez a1, .LBB27_53
+; RV64-NEXT:  .LBB27_119: # %cond.load148
+; RV64-NEXT:    lh a1, 100(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 51
+; RV64-NEXT:    li a3, 50
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 12
+; RV64-NEXT:    bgez a1, .LBB27_54
+; RV64-NEXT:  .LBB27_120: # %cond.load151
+; RV64-NEXT:    lh a1, 102(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 52
+; RV64-NEXT:    li a3, 51
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 11
+; RV64-NEXT:    bgez a1, .LBB27_55
+; RV64-NEXT:  .LBB27_121: # %cond.load154
+; RV64-NEXT:    lh a1, 104(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 53
+; RV64-NEXT:    li a3, 52
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 10
+; RV64-NEXT:    bgez a1, .LBB27_56
+; RV64-NEXT:  .LBB27_122: # %cond.load157
+; RV64-NEXT:    lh a1, 106(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 54
+; RV64-NEXT:    li a3, 53
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 9
+; RV64-NEXT:    bgez a1, .LBB27_57
+; RV64-NEXT:  .LBB27_123: # %cond.load160
+; RV64-NEXT:    lh a1, 108(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 55
+; RV64-NEXT:    li a3, 54
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 8
+; RV64-NEXT:    bgez a1, .LBB27_58
+; RV64-NEXT:  .LBB27_124: # %cond.load163
+; RV64-NEXT:    lh a1, 110(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 56
+; RV64-NEXT:    li a3, 55
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 7
+; RV64-NEXT:    bgez a1, .LBB27_59
+; RV64-NEXT:  .LBB27_125: # %cond.load166
+; RV64-NEXT:    lh a1, 112(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 57
+; RV64-NEXT:    li a3, 56
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 6
+; RV64-NEXT:    bgez a1, .LBB27_60
+; RV64-NEXT:  .LBB27_126: # %cond.load169
+; RV64-NEXT:    lh a1, 114(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 58
+; RV64-NEXT:    li a3, 57
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 5
+; RV64-NEXT:    bgez a1, .LBB27_61
+; RV64-NEXT:  .LBB27_127: # %cond.load172
+; RV64-NEXT:    lh a1, 116(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 59
+; RV64-NEXT:    li a3, 58
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 4
+; RV64-NEXT:    bgez a1, .LBB27_62
+; RV64-NEXT:  .LBB27_128: # %cond.load175
+; RV64-NEXT:    lh a1, 118(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 60
+; RV64-NEXT:    li a3, 59
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 3
+; RV64-NEXT:    bgez a1, .LBB27_63
+; RV64-NEXT:  .LBB27_129: # %cond.load178
+; RV64-NEXT:    lh a1, 120(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v16, a1
+; RV64-NEXT:    li a1, 61
+; RV64-NEXT:    li a3, 60
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v8, v16, a3
+; RV64-NEXT:    slli a1, a2, 2
+; RV64-NEXT:    bltz a1, .LBB27_64
+; RV64-NEXT:    j .LBB27_65
+; RV64-NEXT:  .LBB27_130:
+; RV64-NEXT:    # implicit-def: $v16m8
+; RV64-NEXT:    andi a2, a1, 2
+; RV64-NEXT:    beqz a2, .LBB27_132
+; RV64-NEXT:  .LBB27_131: # %cond.load193
+; RV64-NEXT:    lh a2, 130(a0)
+; RV64-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 1
+; RV64-NEXT:  .LBB27_132: # %else194
+; RV64-NEXT:    andi a2, a1, 4
+; RV64-NEXT:    bnez a2, .LBB27_195
+; RV64-NEXT:  # %bb.133: # %else197
+; RV64-NEXT:    andi a2, a1, 8
+; RV64-NEXT:    bnez a2, .LBB27_196
+; RV64-NEXT:  .LBB27_134: # %else200
+; RV64-NEXT:    andi a2, a1, 16
+; RV64-NEXT:    bnez a2, .LBB27_197
+; RV64-NEXT:  .LBB27_135: # %else203
+; RV64-NEXT:    andi a2, a1, 32
+; RV64-NEXT:    bnez a2, .LBB27_198
+; RV64-NEXT:  .LBB27_136: # %else206
+; RV64-NEXT:    andi a2, a1, 64
+; RV64-NEXT:    bnez a2, .LBB27_199
+; RV64-NEXT:  .LBB27_137: # %else209
+; RV64-NEXT:    andi a2, a1, 128
+; RV64-NEXT:    bnez a2, .LBB27_200
+; RV64-NEXT:  .LBB27_138: # %else212
+; RV64-NEXT:    andi a2, a1, 256
+; RV64-NEXT:    bnez a2, .LBB27_201
+; RV64-NEXT:  .LBB27_139: # %else215
+; RV64-NEXT:    andi a2, a1, 512
+; RV64-NEXT:    bnez a2, .LBB27_202
+; RV64-NEXT:  .LBB27_140: # %else218
+; RV64-NEXT:    andi a2, a1, 1024
+; RV64-NEXT:    bnez a2, .LBB27_203
+; RV64-NEXT:  .LBB27_141: # %else221
+; RV64-NEXT:    slli a2, a1, 52
+; RV64-NEXT:    bltz a2, .LBB27_204
+; RV64-NEXT:  .LBB27_142: # %else224
+; RV64-NEXT:    slli a2, a1, 51
+; RV64-NEXT:    bltz a2, .LBB27_205
+; RV64-NEXT:  .LBB27_143: # %else227
+; RV64-NEXT:    slli a2, a1, 50
+; RV64-NEXT:    bltz a2, .LBB27_206
+; RV64-NEXT:  .LBB27_144: # %else230
+; RV64-NEXT:    slli a2, a1, 49
+; RV64-NEXT:    bltz a2, .LBB27_207
+; RV64-NEXT:  .LBB27_145: # %else233
+; RV64-NEXT:    slli a2, a1, 48
+; RV64-NEXT:    bltz a2, .LBB27_208
+; RV64-NEXT:  .LBB27_146: # %else236
+; RV64-NEXT:    slli a2, a1, 47
+; RV64-NEXT:    bltz a2, .LBB27_209
+; RV64-NEXT:  .LBB27_147: # %else239
+; RV64-NEXT:    slli a2, a1, 46
+; RV64-NEXT:    bltz a2, .LBB27_210
+; RV64-NEXT:  .LBB27_148: # %else242
+; RV64-NEXT:    slli a2, a1, 45
+; RV64-NEXT:    bltz a2, .LBB27_211
+; RV64-NEXT:  .LBB27_149: # %else245
+; RV64-NEXT:    slli a2, a1, 44
+; RV64-NEXT:    bltz a2, .LBB27_212
+; RV64-NEXT:  .LBB27_150: # %else248
+; RV64-NEXT:    slli a2, a1, 43
+; RV64-NEXT:    bltz a2, .LBB27_213
+; RV64-NEXT:  .LBB27_151: # %else251
+; RV64-NEXT:    slli a2, a1, 42
+; RV64-NEXT:    bltz a2, .LBB27_214
+; RV64-NEXT:  .LBB27_152: # %else254
+; RV64-NEXT:    slli a2, a1, 41
+; RV64-NEXT:    bltz a2, .LBB27_215
+; RV64-NEXT:  .LBB27_153: # %else257
+; RV64-NEXT:    slli a2, a1, 40
+; RV64-NEXT:    bltz a2, .LBB27_216
+; RV64-NEXT:  .LBB27_154: # %else260
+; RV64-NEXT:    slli a2, a1, 39
+; RV64-NEXT:    bltz a2, .LBB27_217
+; RV64-NEXT:  .LBB27_155: # %else263
+; RV64-NEXT:    slli a2, a1, 38
+; RV64-NEXT:    bltz a2, .LBB27_218
+; RV64-NEXT:  .LBB27_156: # %else266
+; RV64-NEXT:    slli a2, a1, 37
+; RV64-NEXT:    bltz a2, .LBB27_219
+; RV64-NEXT:  .LBB27_157: # %else269
+; RV64-NEXT:    slli a2, a1, 36
+; RV64-NEXT:    bltz a2, .LBB27_220
+; RV64-NEXT:  .LBB27_158: # %else272
+; RV64-NEXT:    slli a2, a1, 35
+; RV64-NEXT:    bltz a2, .LBB27_221
+; RV64-NEXT:  .LBB27_159: # %else275
+; RV64-NEXT:    slli a2, a1, 34
+; RV64-NEXT:    bltz a2, .LBB27_222
+; RV64-NEXT:  .LBB27_160: # %else278
+; RV64-NEXT:    slli a2, a1, 33
+; RV64-NEXT:    bltz a2, .LBB27_223
+; RV64-NEXT:  .LBB27_161: # %else281
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    bltz a2, .LBB27_224
+; RV64-NEXT:  .LBB27_162: # %else284
+; RV64-NEXT:    slli a2, a1, 31
+; RV64-NEXT:    bltz a2, .LBB27_225
+; RV64-NEXT:  .LBB27_163: # %else287
+; RV64-NEXT:    slli a2, a1, 30
+; RV64-NEXT:    bltz a2, .LBB27_226
+; RV64-NEXT:  .LBB27_164: # %else290
+; RV64-NEXT:    slli a2, a1, 29
+; RV64-NEXT:    bltz a2, .LBB27_227
+; RV64-NEXT:  .LBB27_165: # %else293
+; RV64-NEXT:    slli a2, a1, 28
+; RV64-NEXT:    bltz a2, .LBB27_228
+; RV64-NEXT:  .LBB27_166: # %else296
+; RV64-NEXT:    slli a2, a1, 27
+; RV64-NEXT:    bltz a2, .LBB27_229
+; RV64-NEXT:  .LBB27_167: # %else299
+; RV64-NEXT:    slli a2, a1, 26
+; RV64-NEXT:    bltz a2, .LBB27_230
+; RV64-NEXT:  .LBB27_168: # %else302
+; RV64-NEXT:    slli a2, a1, 25
+; RV64-NEXT:    bltz a2, .LBB27_231
+; RV64-NEXT:  .LBB27_169: # %else305
+; RV64-NEXT:    slli a2, a1, 24
+; RV64-NEXT:    bltz a2, .LBB27_232
+; RV64-NEXT:  .LBB27_170: # %else308
+; RV64-NEXT:    slli a2, a1, 23
+; RV64-NEXT:    bltz a2, .LBB27_233
+; RV64-NEXT:  .LBB27_171: # %else311
+; RV64-NEXT:    slli a2, a1, 22
+; RV64-NEXT:    bltz a2, .LBB27_234
+; RV64-NEXT:  .LBB27_172: # %else314
+; RV64-NEXT:    slli a2, a1, 21
+; RV64-NEXT:    bltz a2, .LBB27_235
+; RV64-NEXT:  .LBB27_173: # %else317
+; RV64-NEXT:    slli a2, a1, 20
+; RV64-NEXT:    bltz a2, .LBB27_236
+; RV64-NEXT:  .LBB27_174: # %else320
+; RV64-NEXT:    slli a2, a1, 19
+; RV64-NEXT:    bltz a2, .LBB27_237
+; RV64-NEXT:  .LBB27_175: # %else323
+; RV64-NEXT:    slli a2, a1, 18
+; RV64-NEXT:    bltz a2, .LBB27_238
+; RV64-NEXT:  .LBB27_176: # %else326
+; RV64-NEXT:    slli a2, a1, 17
+; RV64-NEXT:    bltz a2, .LBB27_239
+; RV64-NEXT:  .LBB27_177: # %else329
+; RV64-NEXT:    slli a2, a1, 16
+; RV64-NEXT:    bltz a2, .LBB27_240
+; RV64-NEXT:  .LBB27_178: # %else332
+; RV64-NEXT:    slli a2, a1, 15
+; RV64-NEXT:    bltz a2, .LBB27_241
+; RV64-NEXT:  .LBB27_179: # %else335
+; RV64-NEXT:    slli a2, a1, 14
+; RV64-NEXT:    bltz a2, .LBB27_242
+; RV64-NEXT:  .LBB27_180: # %else338
+; RV64-NEXT:    slli a2, a1, 13
+; RV64-NEXT:    bltz a2, .LBB27_243
+; RV64-NEXT:  .LBB27_181: # %else341
+; RV64-NEXT:    slli a2, a1, 12
+; RV64-NEXT:    bltz a2, .LBB27_244
+; RV64-NEXT:  .LBB27_182: # %else344
+; RV64-NEXT:    slli a2, a1, 11
+; RV64-NEXT:    bltz a2, .LBB27_245
+; RV64-NEXT:  .LBB27_183: # %else347
+; RV64-NEXT:    slli a2, a1, 10
+; RV64-NEXT:    bltz a2, .LBB27_246
+; RV64-NEXT:  .LBB27_184: # %else350
+; RV64-NEXT:    slli a2, a1, 9
+; RV64-NEXT:    bltz a2, .LBB27_247
+; RV64-NEXT:  .LBB27_185: # %else353
+; RV64-NEXT:    slli a2, a1, 8
+; RV64-NEXT:    bltz a2, .LBB27_248
+; RV64-NEXT:  .LBB27_186: # %else356
+; RV64-NEXT:    slli a2, a1, 7
+; RV64-NEXT:    bltz a2, .LBB27_249
+; RV64-NEXT:  .LBB27_187: # %else359
+; RV64-NEXT:    slli a2, a1, 6
+; RV64-NEXT:    bltz a2, .LBB27_250
+; RV64-NEXT:  .LBB27_188: # %else362
+; RV64-NEXT:    slli a2, a1, 5
+; RV64-NEXT:    bltz a2, .LBB27_251
+; RV64-NEXT:  .LBB27_189: # %else365
+; RV64-NEXT:    slli a2, a1, 4
+; RV64-NEXT:    bltz a2, .LBB27_252
+; RV64-NEXT:  .LBB27_190: # %else368
+; RV64-NEXT:    slli a2, a1, 3
+; RV64-NEXT:    bltz a2, .LBB27_253
+; RV64-NEXT:  .LBB27_191: # %else371
+; RV64-NEXT:    slli a2, a1, 2
+; RV64-NEXT:    bltz a2, .LBB27_254
+; RV64-NEXT:  .LBB27_192: # %else374
+; RV64-NEXT:    slli a2, a1, 1
+; RV64-NEXT:    bltz a2, .LBB27_255
+; RV64-NEXT:  .LBB27_193: # %else377
+; RV64-NEXT:    bltz a1, .LBB27_256
+; RV64-NEXT:  .LBB27_194: # %else380
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB27_195: # %cond.load196
+; RV64-NEXT:    lh a2, 132(a0)
+; RV64-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 2
+; RV64-NEXT:    andi a2, a1, 8
+; RV64-NEXT:    beqz a2, .LBB27_134
+; RV64-NEXT:  .LBB27_196: # %cond.load199
+; RV64-NEXT:    lh a2, 134(a0)
+; RV64-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 3
+; RV64-NEXT:    andi a2, a1, 16
+; RV64-NEXT:    beqz a2, .LBB27_135
+; RV64-NEXT:  .LBB27_197: # %cond.load202
+; RV64-NEXT:    lh a2, 136(a0)
+; RV64-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 4
+; RV64-NEXT:    andi a2, a1, 32
+; RV64-NEXT:    beqz a2, .LBB27_136
+; RV64-NEXT:  .LBB27_198: # %cond.load205
+; RV64-NEXT:    lh a2, 138(a0)
+; RV64-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 5
+; RV64-NEXT:    andi a2, a1, 64
+; RV64-NEXT:    beqz a2, .LBB27_137
+; RV64-NEXT:  .LBB27_199: # %cond.load208
+; RV64-NEXT:    lh a2, 140(a0)
+; RV64-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 6
+; RV64-NEXT:    andi a2, a1, 128
+; RV64-NEXT:    beqz a2, .LBB27_138
+; RV64-NEXT:  .LBB27_200: # %cond.load211
+; RV64-NEXT:    lh a2, 142(a0)
+; RV64-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 7
+; RV64-NEXT:    andi a2, a1, 256
+; RV64-NEXT:    beqz a2, .LBB27_139
+; RV64-NEXT:  .LBB27_201: # %cond.load214
+; RV64-NEXT:    lh a2, 144(a0)
+; RV64-NEXT:    vsetivli zero, 9, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 8
+; RV64-NEXT:    andi a2, a1, 512
+; RV64-NEXT:    beqz a2, .LBB27_140
+; RV64-NEXT:  .LBB27_202: # %cond.load217
+; RV64-NEXT:    lh a2, 146(a0)
+; RV64-NEXT:    vsetivli zero, 10, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 9
+; RV64-NEXT:    andi a2, a1, 1024
+; RV64-NEXT:    beqz a2, .LBB27_141
+; RV64-NEXT:  .LBB27_203: # %cond.load220
+; RV64-NEXT:    lh a2, 148(a0)
+; RV64-NEXT:    vsetivli zero, 11, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 10
+; RV64-NEXT:    slli a2, a1, 52
+; RV64-NEXT:    bgez a2, .LBB27_142
+; RV64-NEXT:  .LBB27_204: # %cond.load223
+; RV64-NEXT:    lh a2, 150(a0)
+; RV64-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 11
+; RV64-NEXT:    slli a2, a1, 51
+; RV64-NEXT:    bgez a2, .LBB27_143
+; RV64-NEXT:  .LBB27_205: # %cond.load226
+; RV64-NEXT:    lh a2, 152(a0)
+; RV64-NEXT:    vsetivli zero, 13, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 12
+; RV64-NEXT:    slli a2, a1, 50
+; RV64-NEXT:    bgez a2, .LBB27_144
+; RV64-NEXT:  .LBB27_206: # %cond.load229
+; RV64-NEXT:    lh a2, 154(a0)
+; RV64-NEXT:    vsetivli zero, 14, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 13
+; RV64-NEXT:    slli a2, a1, 49
+; RV64-NEXT:    bgez a2, .LBB27_145
+; RV64-NEXT:  .LBB27_207: # %cond.load232
+; RV64-NEXT:    lh a2, 156(a0)
+; RV64-NEXT:    vsetivli zero, 15, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 14
+; RV64-NEXT:    slli a2, a1, 48
+; RV64-NEXT:    bgez a2, .LBB27_146
+; RV64-NEXT:  .LBB27_208: # %cond.load235
+; RV64-NEXT:    lh a2, 158(a0)
+; RV64-NEXT:    vsetivli zero, 16, e16, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 15
+; RV64-NEXT:    slli a2, a1, 47
+; RV64-NEXT:    bgez a2, .LBB27_147
+; RV64-NEXT:  .LBB27_209: # %cond.load238
+; RV64-NEXT:    lh a2, 160(a0)
+; RV64-NEXT:    vsetivli zero, 17, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 16
+; RV64-NEXT:    slli a2, a1, 46
+; RV64-NEXT:    bgez a2, .LBB27_148
+; RV64-NEXT:  .LBB27_210: # %cond.load241
+; RV64-NEXT:    lh a2, 162(a0)
+; RV64-NEXT:    vsetivli zero, 18, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 17
+; RV64-NEXT:    slli a2, a1, 45
+; RV64-NEXT:    bgez a2, .LBB27_149
+; RV64-NEXT:  .LBB27_211: # %cond.load244
+; RV64-NEXT:    lh a2, 164(a0)
+; RV64-NEXT:    vsetivli zero, 19, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 18
+; RV64-NEXT:    slli a2, a1, 44
+; RV64-NEXT:    bgez a2, .LBB27_150
+; RV64-NEXT:  .LBB27_212: # %cond.load247
+; RV64-NEXT:    lh a2, 166(a0)
+; RV64-NEXT:    vsetivli zero, 20, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 19
+; RV64-NEXT:    slli a2, a1, 43
+; RV64-NEXT:    bgez a2, .LBB27_151
+; RV64-NEXT:  .LBB27_213: # %cond.load250
+; RV64-NEXT:    lh a2, 168(a0)
+; RV64-NEXT:    vsetivli zero, 21, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 20
+; RV64-NEXT:    slli a2, a1, 42
+; RV64-NEXT:    bgez a2, .LBB27_152
+; RV64-NEXT:  .LBB27_214: # %cond.load253
+; RV64-NEXT:    lh a2, 170(a0)
+; RV64-NEXT:    vsetivli zero, 22, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 21
+; RV64-NEXT:    slli a2, a1, 41
+; RV64-NEXT:    bgez a2, .LBB27_153
+; RV64-NEXT:  .LBB27_215: # %cond.load256
+; RV64-NEXT:    lh a2, 172(a0)
+; RV64-NEXT:    vsetivli zero, 23, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 22
+; RV64-NEXT:    slli a2, a1, 40
+; RV64-NEXT:    bgez a2, .LBB27_154
+; RV64-NEXT:  .LBB27_216: # %cond.load259
+; RV64-NEXT:    lh a2, 174(a0)
+; RV64-NEXT:    vsetivli zero, 24, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 23
+; RV64-NEXT:    slli a2, a1, 39
+; RV64-NEXT:    bgez a2, .LBB27_155
+; RV64-NEXT:  .LBB27_217: # %cond.load262
+; RV64-NEXT:    lh a2, 176(a0)
+; RV64-NEXT:    vsetivli zero, 25, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 24
+; RV64-NEXT:    slli a2, a1, 38
+; RV64-NEXT:    bgez a2, .LBB27_156
+; RV64-NEXT:  .LBB27_218: # %cond.load265
+; RV64-NEXT:    lh a2, 178(a0)
+; RV64-NEXT:    vsetivli zero, 26, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 25
+; RV64-NEXT:    slli a2, a1, 37
+; RV64-NEXT:    bgez a2, .LBB27_157
+; RV64-NEXT:  .LBB27_219: # %cond.load268
+; RV64-NEXT:    lh a2, 180(a0)
+; RV64-NEXT:    vsetivli zero, 27, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 26
+; RV64-NEXT:    slli a2, a1, 36
+; RV64-NEXT:    bgez a2, .LBB27_158
+; RV64-NEXT:  .LBB27_220: # %cond.load271
+; RV64-NEXT:    lh a2, 182(a0)
+; RV64-NEXT:    vsetivli zero, 28, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 27
+; RV64-NEXT:    slli a2, a1, 35
+; RV64-NEXT:    bgez a2, .LBB27_159
+; RV64-NEXT:  .LBB27_221: # %cond.load274
+; RV64-NEXT:    lh a2, 184(a0)
+; RV64-NEXT:    vsetivli zero, 29, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 28
+; RV64-NEXT:    slli a2, a1, 34
+; RV64-NEXT:    bgez a2, .LBB27_160
+; RV64-NEXT:  .LBB27_222: # %cond.load277
+; RV64-NEXT:    lh a2, 186(a0)
+; RV64-NEXT:    vsetivli zero, 30, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 29
+; RV64-NEXT:    slli a2, a1, 33
+; RV64-NEXT:    bgez a2, .LBB27_161
+; RV64-NEXT:  .LBB27_223: # %cond.load280
+; RV64-NEXT:    lh a2, 188(a0)
+; RV64-NEXT:    vsetivli zero, 31, e16, m4, tu, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    vslideup.vi v16, v24, 30
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    bgez a2, .LBB27_162
+; RV64-NEXT:  .LBB27_224: # %cond.load283
+; RV64-NEXT:    lh a2, 190(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 32
+; RV64-NEXT:    vsetvli zero, a2, e16, m4, tu, ma
+; RV64-NEXT:    vslideup.vi v16, v24, 31
+; RV64-NEXT:    slli a2, a1, 31
+; RV64-NEXT:    bgez a2, .LBB27_163
+; RV64-NEXT:  .LBB27_225: # %cond.load286
+; RV64-NEXT:    lh a2, 192(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 33
+; RV64-NEXT:    li a3, 32
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 30
+; RV64-NEXT:    bgez a2, .LBB27_164
+; RV64-NEXT:  .LBB27_226: # %cond.load289
+; RV64-NEXT:    lh a2, 194(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 34
+; RV64-NEXT:    li a3, 33
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 29
+; RV64-NEXT:    bgez a2, .LBB27_165
+; RV64-NEXT:  .LBB27_227: # %cond.load292
+; RV64-NEXT:    lh a2, 196(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 35
+; RV64-NEXT:    li a3, 34
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 28
+; RV64-NEXT:    bgez a2, .LBB27_166
+; RV64-NEXT:  .LBB27_228: # %cond.load295
+; RV64-NEXT:    lh a2, 198(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 36
+; RV64-NEXT:    li a3, 35
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 27
+; RV64-NEXT:    bgez a2, .LBB27_167
+; RV64-NEXT:  .LBB27_229: # %cond.load298
+; RV64-NEXT:    lh a2, 200(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 37
+; RV64-NEXT:    li a3, 36
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 26
+; RV64-NEXT:    bgez a2, .LBB27_168
+; RV64-NEXT:  .LBB27_230: # %cond.load301
+; RV64-NEXT:    lh a2, 202(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 38
+; RV64-NEXT:    li a3, 37
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 25
+; RV64-NEXT:    bgez a2, .LBB27_169
+; RV64-NEXT:  .LBB27_231: # %cond.load304
+; RV64-NEXT:    lh a2, 204(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 39
+; RV64-NEXT:    li a3, 38
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 24
+; RV64-NEXT:    bgez a2, .LBB27_170
+; RV64-NEXT:  .LBB27_232: # %cond.load307
+; RV64-NEXT:    lh a2, 206(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 40
+; RV64-NEXT:    li a3, 39
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 23
+; RV64-NEXT:    bgez a2, .LBB27_171
+; RV64-NEXT:  .LBB27_233: # %cond.load310
+; RV64-NEXT:    lh a2, 208(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 41
+; RV64-NEXT:    li a3, 40
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 22
+; RV64-NEXT:    bgez a2, .LBB27_172
+; RV64-NEXT:  .LBB27_234: # %cond.load313
+; RV64-NEXT:    lh a2, 210(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 42
+; RV64-NEXT:    li a3, 41
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 21
+; RV64-NEXT:    bgez a2, .LBB27_173
+; RV64-NEXT:  .LBB27_235: # %cond.load316
+; RV64-NEXT:    lh a2, 212(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 43
+; RV64-NEXT:    li a3, 42
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 20
+; RV64-NEXT:    bgez a2, .LBB27_174
+; RV64-NEXT:  .LBB27_236: # %cond.load319
+; RV64-NEXT:    lh a2, 214(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 44
+; RV64-NEXT:    li a3, 43
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 19
+; RV64-NEXT:    bgez a2, .LBB27_175
+; RV64-NEXT:  .LBB27_237: # %cond.load322
+; RV64-NEXT:    lh a2, 216(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 45
+; RV64-NEXT:    li a3, 44
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 18
+; RV64-NEXT:    bgez a2, .LBB27_176
+; RV64-NEXT:  .LBB27_238: # %cond.load325
+; RV64-NEXT:    lh a2, 218(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 46
+; RV64-NEXT:    li a3, 45
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 17
+; RV64-NEXT:    bgez a2, .LBB27_177
+; RV64-NEXT:  .LBB27_239: # %cond.load328
+; RV64-NEXT:    lh a2, 220(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 47
+; RV64-NEXT:    li a3, 46
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 16
+; RV64-NEXT:    bgez a2, .LBB27_178
+; RV64-NEXT:  .LBB27_240: # %cond.load331
+; RV64-NEXT:    lh a2, 222(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 48
+; RV64-NEXT:    li a3, 47
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 15
+; RV64-NEXT:    bgez a2, .LBB27_179
+; RV64-NEXT:  .LBB27_241: # %cond.load334
+; RV64-NEXT:    lh a2, 224(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 49
+; RV64-NEXT:    li a3, 48
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 14
+; RV64-NEXT:    bgez a2, .LBB27_180
+; RV64-NEXT:  .LBB27_242: # %cond.load337
+; RV64-NEXT:    lh a2, 226(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 50
+; RV64-NEXT:    li a3, 49
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 13
+; RV64-NEXT:    bgez a2, .LBB27_181
+; RV64-NEXT:  .LBB27_243: # %cond.load340
+; RV64-NEXT:    lh a2, 228(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 51
+; RV64-NEXT:    li a3, 50
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 12
+; RV64-NEXT:    bgez a2, .LBB27_182
+; RV64-NEXT:  .LBB27_244: # %cond.load343
+; RV64-NEXT:    lh a2, 230(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 52
+; RV64-NEXT:    li a3, 51
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 11
+; RV64-NEXT:    bgez a2, .LBB27_183
+; RV64-NEXT:  .LBB27_245: # %cond.load346
+; RV64-NEXT:    lh a2, 232(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 53
+; RV64-NEXT:    li a3, 52
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 10
+; RV64-NEXT:    bgez a2, .LBB27_184
+; RV64-NEXT:  .LBB27_246: # %cond.load349
+; RV64-NEXT:    lh a2, 234(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 54
+; RV64-NEXT:    li a3, 53
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 9
+; RV64-NEXT:    bgez a2, .LBB27_185
+; RV64-NEXT:  .LBB27_247: # %cond.load352
+; RV64-NEXT:    lh a2, 236(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 55
+; RV64-NEXT:    li a3, 54
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 8
+; RV64-NEXT:    bgez a2, .LBB27_186
+; RV64-NEXT:  .LBB27_248: # %cond.load355
+; RV64-NEXT:    lh a2, 238(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 56
+; RV64-NEXT:    li a3, 55
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 7
+; RV64-NEXT:    bgez a2, .LBB27_187
+; RV64-NEXT:  .LBB27_249: # %cond.load358
+; RV64-NEXT:    lh a2, 240(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 57
+; RV64-NEXT:    li a3, 56
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 6
+; RV64-NEXT:    bgez a2, .LBB27_188
+; RV64-NEXT:  .LBB27_250: # %cond.load361
+; RV64-NEXT:    lh a2, 242(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 58
+; RV64-NEXT:    li a3, 57
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 5
+; RV64-NEXT:    bgez a2, .LBB27_189
+; RV64-NEXT:  .LBB27_251: # %cond.load364
+; RV64-NEXT:    lh a2, 244(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 59
+; RV64-NEXT:    li a3, 58
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 4
+; RV64-NEXT:    bgez a2, .LBB27_190
+; RV64-NEXT:  .LBB27_252: # %cond.load367
+; RV64-NEXT:    lh a2, 246(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 60
+; RV64-NEXT:    li a3, 59
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 3
+; RV64-NEXT:    bgez a2, .LBB27_191
+; RV64-NEXT:  .LBB27_253: # %cond.load370
+; RV64-NEXT:    lh a2, 248(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 61
+; RV64-NEXT:    li a3, 60
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 2
+; RV64-NEXT:    bgez a2, .LBB27_192
+; RV64-NEXT:  .LBB27_254: # %cond.load373
+; RV64-NEXT:    lh a2, 250(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 62
+; RV64-NEXT:    li a3, 61
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    slli a2, a1, 1
+; RV64-NEXT:    bgez a2, .LBB27_193
+; RV64-NEXT:  .LBB27_255: # %cond.load376
+; RV64-NEXT:    lh a2, 252(a0)
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a2
+; RV64-NEXT:    li a2, 63
+; RV64-NEXT:    li a3, 62
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-NEXT:    vslideup.vx v16, v24, a3
+; RV64-NEXT:    bgez a1, .LBB27_194
+; RV64-NEXT:  .LBB27_256: # %cond.load379
+; RV64-NEXT:    lh a0, 254(a0)
+; RV64-NEXT:    li a1, 64
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT:    vmv.s.x v24, a0
+; RV64-NEXT:    li a0, 63
+; RV64-NEXT:    vslideup.vx v16, v24, a0
+; RV64-NEXT:    ret
+  %load = call <128 x bfloat> @llvm.masked.load.v128bf16(ptr %a, i32 8, <128 x i1> %mask, <128 x bfloat> undef)
+  ret <128 x bfloat> %load
+}
+
+define <128 x half> @masked_load_v128f16(ptr %a, <128 x i1> %mask) {
+; ZVFH-LABEL: masked_load_v128f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    li a1, 64
+; ZVFH-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; ZVFH-NEXT:    vle16.v v8, (a0), v0.t
+; ZVFH-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
+; ZVFH-NEXT:    vslidedown.vi v0, v0, 8
+; ZVFH-NEXT:    addi a0, a0, 128
+; ZVFH-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; ZVFH-NEXT:    vle16.v v16, (a0), v0.t
+; ZVFH-NEXT:    ret
+;
+; RV32-ZVFHMIN-LABEL: masked_load_v128f16:
+; RV32-ZVFHMIN:       # %bb.0:
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v0
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 1
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB28_2
+; RV32-ZVFHMIN-NEXT:  # %bb.1: # %cond.load
+; RV32-ZVFHMIN-NEXT:    lh a1, 0(a0)
+; RV32-ZVFHMIN-NEXT:    fmv.x.h a3, fa5
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.v.x v8, a3
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v8, a1
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 2
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB28_3
+; RV32-ZVFHMIN-NEXT:    j .LBB28_4
+; RV32-ZVFHMIN-NEXT:  .LBB28_2:
+; RV32-ZVFHMIN-NEXT:    # implicit-def: $v8m8
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 2
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB28_4
+; RV32-ZVFHMIN-NEXT:  .LBB28_3: # %cond.load1
+; RV32-ZVFHMIN-NEXT:    lh a1, 2(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 1
+; RV32-ZVFHMIN-NEXT:  .LBB28_4: # %else2
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 4
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB28_74
+; RV32-ZVFHMIN-NEXT:  # %bb.5: # %else5
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 8
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB28_75
+; RV32-ZVFHMIN-NEXT:  .LBB28_6: # %else8
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 16
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB28_76
+; RV32-ZVFHMIN-NEXT:  .LBB28_7: # %else11
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 32
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB28_77
+; RV32-ZVFHMIN-NEXT:  .LBB28_8: # %else14
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 64
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB28_78
+; RV32-ZVFHMIN-NEXT:  .LBB28_9: # %else17
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 128
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB28_79
+; RV32-ZVFHMIN-NEXT:  .LBB28_10: # %else20
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 256
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB28_80
+; RV32-ZVFHMIN-NEXT:  .LBB28_11: # %else23
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 512
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB28_81
+; RV32-ZVFHMIN-NEXT:  .LBB28_12: # %else26
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 1024
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB28_82
+; RV32-ZVFHMIN-NEXT:  .LBB28_13: # %else29
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 20
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_83
+; RV32-ZVFHMIN-NEXT:  .LBB28_14: # %else32
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 19
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_84
+; RV32-ZVFHMIN-NEXT:  .LBB28_15: # %else35
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 18
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_85
+; RV32-ZVFHMIN-NEXT:  .LBB28_16: # %else38
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 17
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_86
+; RV32-ZVFHMIN-NEXT:  .LBB28_17: # %else41
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 16
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_87
+; RV32-ZVFHMIN-NEXT:  .LBB28_18: # %else44
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 15
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_88
+; RV32-ZVFHMIN-NEXT:  .LBB28_19: # %else47
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 14
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_89
+; RV32-ZVFHMIN-NEXT:  .LBB28_20: # %else50
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 13
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_90
+; RV32-ZVFHMIN-NEXT:  .LBB28_21: # %else53
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 12
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_91
+; RV32-ZVFHMIN-NEXT:  .LBB28_22: # %else56
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 11
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_92
+; RV32-ZVFHMIN-NEXT:  .LBB28_23: # %else59
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 10
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_93
+; RV32-ZVFHMIN-NEXT:  .LBB28_24: # %else62
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 9
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_94
+; RV32-ZVFHMIN-NEXT:  .LBB28_25: # %else65
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 8
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_95
+; RV32-ZVFHMIN-NEXT:  .LBB28_26: # %else68
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 7
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_96
+; RV32-ZVFHMIN-NEXT:  .LBB28_27: # %else71
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 6
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_97
+; RV32-ZVFHMIN-NEXT:  .LBB28_28: # %else74
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 5
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_98
+; RV32-ZVFHMIN-NEXT:  .LBB28_29: # %else77
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 4
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_99
+; RV32-ZVFHMIN-NEXT:  .LBB28_30: # %else80
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 3
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_32
+; RV32-ZVFHMIN-NEXT:  .LBB28_31: # %cond.load82
+; RV32-ZVFHMIN-NEXT:    lh a1, 56(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 29, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vmv8r.v v24, v8
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 28
+; RV32-ZVFHMIN-NEXT:    vmv4r.v v24, v8
+; RV32-ZVFHMIN-NEXT:    vmv8r.v v8, v24
+; RV32-ZVFHMIN-NEXT:  .LBB28_32: # %else83
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 2
+; RV32-ZVFHMIN-NEXT:    li a1, 32
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_34
+; RV32-ZVFHMIN-NEXT:  # %bb.33: # %cond.load85
+; RV32-ZVFHMIN-NEXT:    lh a3, 58(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 30, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a3
+; RV32-ZVFHMIN-NEXT:    vmv8r.v v24, v8
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 29
+; RV32-ZVFHMIN-NEXT:    vmv4r.v v24, v8
+; RV32-ZVFHMIN-NEXT:    vmv8r.v v8, v24
+; RV32-ZVFHMIN-NEXT:  .LBB28_34: # %else86
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 1
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vsrl.vx v16, v0, a1
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_36
+; RV32-ZVFHMIN-NEXT:  # %bb.35: # %cond.load88
+; RV32-ZVFHMIN-NEXT:    lh a3, 60(a0)
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v20, a3
+; RV32-ZVFHMIN-NEXT:    vmv8r.v v24, v8
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 31, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v20, 30
+; RV32-ZVFHMIN-NEXT:    vmv4r.v v24, v8
+; RV32-ZVFHMIN-NEXT:    vmv8r.v v8, v24
+; RV32-ZVFHMIN-NEXT:  .LBB28_36: # %else89
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a3, v16
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_100
+; RV32-ZVFHMIN-NEXT:  # %bb.37: # %else92
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 1
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_101
+; RV32-ZVFHMIN-NEXT:  .LBB28_38: # %else95
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 2
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_102
+; RV32-ZVFHMIN-NEXT:  .LBB28_39: # %else98
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 4
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_103
+; RV32-ZVFHMIN-NEXT:  .LBB28_40: # %else101
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 8
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_104
+; RV32-ZVFHMIN-NEXT:  .LBB28_41: # %else104
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 16
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_105
+; RV32-ZVFHMIN-NEXT:  .LBB28_42: # %else107
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 32
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_106
+; RV32-ZVFHMIN-NEXT:  .LBB28_43: # %else110
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 64
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_107
+; RV32-ZVFHMIN-NEXT:  .LBB28_44: # %else113
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 128
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_108
+; RV32-ZVFHMIN-NEXT:  .LBB28_45: # %else116
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 256
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_109
+; RV32-ZVFHMIN-NEXT:  .LBB28_46: # %else119
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 512
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_110
+; RV32-ZVFHMIN-NEXT:  .LBB28_47: # %else122
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 1024
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_111
+; RV32-ZVFHMIN-NEXT:  .LBB28_48: # %else125
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 20
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_112
+; RV32-ZVFHMIN-NEXT:  .LBB28_49: # %else128
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 19
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_113
+; RV32-ZVFHMIN-NEXT:  .LBB28_50: # %else131
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 18
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_114
+; RV32-ZVFHMIN-NEXT:  .LBB28_51: # %else134
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 17
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_115
+; RV32-ZVFHMIN-NEXT:  .LBB28_52: # %else137
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 16
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_116
+; RV32-ZVFHMIN-NEXT:  .LBB28_53: # %else140
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 15
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_117
+; RV32-ZVFHMIN-NEXT:  .LBB28_54: # %else143
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 14
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_118
+; RV32-ZVFHMIN-NEXT:  .LBB28_55: # %else146
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 13
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_119
+; RV32-ZVFHMIN-NEXT:  .LBB28_56: # %else149
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 12
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_120
+; RV32-ZVFHMIN-NEXT:  .LBB28_57: # %else152
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 11
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_121
+; RV32-ZVFHMIN-NEXT:  .LBB28_58: # %else155
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 10
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_122
+; RV32-ZVFHMIN-NEXT:  .LBB28_59: # %else158
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 9
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_123
+; RV32-ZVFHMIN-NEXT:  .LBB28_60: # %else161
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 8
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_124
+; RV32-ZVFHMIN-NEXT:  .LBB28_61: # %else164
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 7
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_125
+; RV32-ZVFHMIN-NEXT:  .LBB28_62: # %else167
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 6
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_126
+; RV32-ZVFHMIN-NEXT:  .LBB28_63: # %else170
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 5
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_127
+; RV32-ZVFHMIN-NEXT:  .LBB28_64: # %else173
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 4
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_128
+; RV32-ZVFHMIN-NEXT:  .LBB28_65: # %else176
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 3
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_129
+; RV32-ZVFHMIN-NEXT:  .LBB28_66: # %else179
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 2
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_68
+; RV32-ZVFHMIN-NEXT:  .LBB28_67: # %cond.load181
+; RV32-ZVFHMIN-NEXT:    lh a2, 122(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 62
+; RV32-ZVFHMIN-NEXT:    li a4, 61
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:  .LBB28_68: # %else182
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 1
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v24, v0, 1
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_70
+; RV32-ZVFHMIN-NEXT:  # %bb.69: # %cond.load184
+; RV32-ZVFHMIN-NEXT:    lh a2, 124(a0)
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 63
+; RV32-ZVFHMIN-NEXT:    li a4, 62
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:  .LBB28_70: # %else185
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v24
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_72
+; RV32-ZVFHMIN-NEXT:  # %bb.71: # %cond.load187
+; RV32-ZVFHMIN-NEXT:    lh a3, 126(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a3
+; RV32-ZVFHMIN-NEXT:    li a3, 63
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV32-ZVFHMIN-NEXT:  .LBB28_72: # %else188
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 1
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_130
+; RV32-ZVFHMIN-NEXT:  # %bb.73: # %cond.load190
+; RV32-ZVFHMIN-NEXT:    lh a3, 128(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a3
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 2
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_131
+; RV32-ZVFHMIN-NEXT:    j .LBB28_132
+; RV32-ZVFHMIN-NEXT:  .LBB28_74: # %cond.load4
+; RV32-ZVFHMIN-NEXT:    lh a1, 4(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 2
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 8
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB28_6
+; RV32-ZVFHMIN-NEXT:  .LBB28_75: # %cond.load7
+; RV32-ZVFHMIN-NEXT:    lh a1, 6(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 3
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 16
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB28_7
+; RV32-ZVFHMIN-NEXT:  .LBB28_76: # %cond.load10
+; RV32-ZVFHMIN-NEXT:    lh a1, 8(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 4
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 32
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB28_8
+; RV32-ZVFHMIN-NEXT:  .LBB28_77: # %cond.load13
+; RV32-ZVFHMIN-NEXT:    lh a1, 10(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 5
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 64
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB28_9
+; RV32-ZVFHMIN-NEXT:  .LBB28_78: # %cond.load16
+; RV32-ZVFHMIN-NEXT:    lh a1, 12(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 6
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 128
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB28_10
+; RV32-ZVFHMIN-NEXT:  .LBB28_79: # %cond.load19
+; RV32-ZVFHMIN-NEXT:    lh a1, 14(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 7
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 256
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB28_11
+; RV32-ZVFHMIN-NEXT:  .LBB28_80: # %cond.load22
+; RV32-ZVFHMIN-NEXT:    lh a1, 16(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 9, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 8
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 512
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB28_12
+; RV32-ZVFHMIN-NEXT:  .LBB28_81: # %cond.load25
+; RV32-ZVFHMIN-NEXT:    lh a1, 18(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 10, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 9
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 1024
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB28_13
+; RV32-ZVFHMIN-NEXT:  .LBB28_82: # %cond.load28
+; RV32-ZVFHMIN-NEXT:    lh a1, 20(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 11, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 10
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 20
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_14
+; RV32-ZVFHMIN-NEXT:  .LBB28_83: # %cond.load31
+; RV32-ZVFHMIN-NEXT:    lh a1, 22(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 11
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 19
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_15
+; RV32-ZVFHMIN-NEXT:  .LBB28_84: # %cond.load34
+; RV32-ZVFHMIN-NEXT:    lh a1, 24(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 13, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 12
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 18
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_16
+; RV32-ZVFHMIN-NEXT:  .LBB28_85: # %cond.load37
+; RV32-ZVFHMIN-NEXT:    lh a1, 26(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 14, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 13
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 17
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_17
+; RV32-ZVFHMIN-NEXT:  .LBB28_86: # %cond.load40
+; RV32-ZVFHMIN-NEXT:    lh a1, 28(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 15, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 14
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 16
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_18
+; RV32-ZVFHMIN-NEXT:  .LBB28_87: # %cond.load43
+; RV32-ZVFHMIN-NEXT:    lh a1, 30(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 15
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 15
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_19
+; RV32-ZVFHMIN-NEXT:  .LBB28_88: # %cond.load46
+; RV32-ZVFHMIN-NEXT:    lh a1, 32(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 17, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 16
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 14
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_20
+; RV32-ZVFHMIN-NEXT:  .LBB28_89: # %cond.load49
+; RV32-ZVFHMIN-NEXT:    lh a1, 34(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 18, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 17
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 13
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_21
+; RV32-ZVFHMIN-NEXT:  .LBB28_90: # %cond.load52
+; RV32-ZVFHMIN-NEXT:    lh a1, 36(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 19, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 18
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 12
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_22
+; RV32-ZVFHMIN-NEXT:  .LBB28_91: # %cond.load55
+; RV32-ZVFHMIN-NEXT:    lh a1, 38(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 20, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 19
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 11
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_23
+; RV32-ZVFHMIN-NEXT:  .LBB28_92: # %cond.load58
+; RV32-ZVFHMIN-NEXT:    lh a1, 40(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 21, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 20
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 10
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_24
+; RV32-ZVFHMIN-NEXT:  .LBB28_93: # %cond.load61
+; RV32-ZVFHMIN-NEXT:    lh a1, 42(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 22, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 21
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 9
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_25
+; RV32-ZVFHMIN-NEXT:  .LBB28_94: # %cond.load64
+; RV32-ZVFHMIN-NEXT:    lh a1, 44(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 23, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 22
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 8
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_26
+; RV32-ZVFHMIN-NEXT:  .LBB28_95: # %cond.load67
+; RV32-ZVFHMIN-NEXT:    lh a1, 46(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 24, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 23
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 7
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_27
+; RV32-ZVFHMIN-NEXT:  .LBB28_96: # %cond.load70
+; RV32-ZVFHMIN-NEXT:    lh a1, 48(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 25, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 24
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 6
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_28
+; RV32-ZVFHMIN-NEXT:  .LBB28_97: # %cond.load73
+; RV32-ZVFHMIN-NEXT:    lh a1, 50(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 26, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 25
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 5
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_29
+; RV32-ZVFHMIN-NEXT:  .LBB28_98: # %cond.load76
+; RV32-ZVFHMIN-NEXT:    lh a1, 52(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 27, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vmv8r.v v24, v8
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 26
+; RV32-ZVFHMIN-NEXT:    vmv4r.v v24, v8
+; RV32-ZVFHMIN-NEXT:    vmv8r.v v8, v24
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 4
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_30
+; RV32-ZVFHMIN-NEXT:  .LBB28_99: # %cond.load79
+; RV32-ZVFHMIN-NEXT:    lh a1, 54(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 28, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV32-ZVFHMIN-NEXT:    vmv8r.v v24, v8
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 27
+; RV32-ZVFHMIN-NEXT:    vmv4r.v v24, v8
+; RV32-ZVFHMIN-NEXT:    vmv8r.v v8, v24
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 3
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_31
+; RV32-ZVFHMIN-NEXT:    j .LBB28_32
+; RV32-ZVFHMIN-NEXT:  .LBB28_100: # %cond.load91
+; RV32-ZVFHMIN-NEXT:    lh a2, 62(a0)
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    vmv8r.v v24, v8
+; RV32-ZVFHMIN-NEXT:    li a2, 32
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 31
+; RV32-ZVFHMIN-NEXT:    vmv4r.v v24, v8
+; RV32-ZVFHMIN-NEXT:    vmv8r.v v8, v24
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 1
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_38
+; RV32-ZVFHMIN-NEXT:  .LBB28_101: # %cond.load94
+; RV32-ZVFHMIN-NEXT:    lh a2, 64(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 33
+; RV32-ZVFHMIN-NEXT:    li a4, 32
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 2
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_39
+; RV32-ZVFHMIN-NEXT:  .LBB28_102: # %cond.load97
+; RV32-ZVFHMIN-NEXT:    lh a2, 66(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 34
+; RV32-ZVFHMIN-NEXT:    li a4, 33
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 4
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_40
+; RV32-ZVFHMIN-NEXT:  .LBB28_103: # %cond.load100
+; RV32-ZVFHMIN-NEXT:    lh a2, 68(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 35
+; RV32-ZVFHMIN-NEXT:    li a4, 34
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 8
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_41
+; RV32-ZVFHMIN-NEXT:  .LBB28_104: # %cond.load103
+; RV32-ZVFHMIN-NEXT:    lh a2, 70(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 36
+; RV32-ZVFHMIN-NEXT:    li a4, 35
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 16
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_42
+; RV32-ZVFHMIN-NEXT:  .LBB28_105: # %cond.load106
+; RV32-ZVFHMIN-NEXT:    lh a2, 72(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 37
+; RV32-ZVFHMIN-NEXT:    li a4, 36
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 32
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_43
+; RV32-ZVFHMIN-NEXT:  .LBB28_106: # %cond.load109
+; RV32-ZVFHMIN-NEXT:    lh a2, 74(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 38
+; RV32-ZVFHMIN-NEXT:    li a4, 37
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 64
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_44
+; RV32-ZVFHMIN-NEXT:  .LBB28_107: # %cond.load112
+; RV32-ZVFHMIN-NEXT:    lh a2, 76(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 39
+; RV32-ZVFHMIN-NEXT:    li a4, 38
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 128
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_45
+; RV32-ZVFHMIN-NEXT:  .LBB28_108: # %cond.load115
+; RV32-ZVFHMIN-NEXT:    lh a2, 78(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 40
+; RV32-ZVFHMIN-NEXT:    li a4, 39
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 256
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_46
+; RV32-ZVFHMIN-NEXT:  .LBB28_109: # %cond.load118
+; RV32-ZVFHMIN-NEXT:    lh a2, 80(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 41
+; RV32-ZVFHMIN-NEXT:    li a4, 40
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 512
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_47
+; RV32-ZVFHMIN-NEXT:  .LBB28_110: # %cond.load121
+; RV32-ZVFHMIN-NEXT:    lh a2, 82(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 42
+; RV32-ZVFHMIN-NEXT:    li a4, 41
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 1024
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_48
+; RV32-ZVFHMIN-NEXT:  .LBB28_111: # %cond.load124
+; RV32-ZVFHMIN-NEXT:    lh a2, 84(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 43
+; RV32-ZVFHMIN-NEXT:    li a4, 42
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 20
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_49
+; RV32-ZVFHMIN-NEXT:  .LBB28_112: # %cond.load127
+; RV32-ZVFHMIN-NEXT:    lh a2, 86(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 44
+; RV32-ZVFHMIN-NEXT:    li a4, 43
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 19
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_50
+; RV32-ZVFHMIN-NEXT:  .LBB28_113: # %cond.load130
+; RV32-ZVFHMIN-NEXT:    lh a2, 88(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 45
+; RV32-ZVFHMIN-NEXT:    li a4, 44
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 18
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_51
+; RV32-ZVFHMIN-NEXT:  .LBB28_114: # %cond.load133
+; RV32-ZVFHMIN-NEXT:    lh a2, 90(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 46
+; RV32-ZVFHMIN-NEXT:    li a4, 45
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 17
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_52
+; RV32-ZVFHMIN-NEXT:  .LBB28_115: # %cond.load136
+; RV32-ZVFHMIN-NEXT:    lh a2, 92(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 47
+; RV32-ZVFHMIN-NEXT:    li a4, 46
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 16
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_53
+; RV32-ZVFHMIN-NEXT:  .LBB28_116: # %cond.load139
+; RV32-ZVFHMIN-NEXT:    lh a2, 94(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 48
+; RV32-ZVFHMIN-NEXT:    li a4, 47
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 15
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_54
+; RV32-ZVFHMIN-NEXT:  .LBB28_117: # %cond.load142
+; RV32-ZVFHMIN-NEXT:    lh a2, 96(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 49
+; RV32-ZVFHMIN-NEXT:    li a4, 48
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 14
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_55
+; RV32-ZVFHMIN-NEXT:  .LBB28_118: # %cond.load145
+; RV32-ZVFHMIN-NEXT:    lh a2, 98(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 50
+; RV32-ZVFHMIN-NEXT:    li a4, 49
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 13
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_56
+; RV32-ZVFHMIN-NEXT:  .LBB28_119: # %cond.load148
+; RV32-ZVFHMIN-NEXT:    lh a2, 100(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 51
+; RV32-ZVFHMIN-NEXT:    li a4, 50
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 12
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_57
+; RV32-ZVFHMIN-NEXT:  .LBB28_120: # %cond.load151
+; RV32-ZVFHMIN-NEXT:    lh a2, 102(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 52
+; RV32-ZVFHMIN-NEXT:    li a4, 51
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 11
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_58
+; RV32-ZVFHMIN-NEXT:  .LBB28_121: # %cond.load154
+; RV32-ZVFHMIN-NEXT:    lh a2, 104(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 53
+; RV32-ZVFHMIN-NEXT:    li a4, 52
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 10
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_59
+; RV32-ZVFHMIN-NEXT:  .LBB28_122: # %cond.load157
+; RV32-ZVFHMIN-NEXT:    lh a2, 106(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 54
+; RV32-ZVFHMIN-NEXT:    li a4, 53
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 9
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_60
+; RV32-ZVFHMIN-NEXT:  .LBB28_123: # %cond.load160
+; RV32-ZVFHMIN-NEXT:    lh a2, 108(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 55
+; RV32-ZVFHMIN-NEXT:    li a4, 54
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 8
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_61
+; RV32-ZVFHMIN-NEXT:  .LBB28_124: # %cond.load163
+; RV32-ZVFHMIN-NEXT:    lh a2, 110(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 56
+; RV32-ZVFHMIN-NEXT:    li a4, 55
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 7
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_62
+; RV32-ZVFHMIN-NEXT:  .LBB28_125: # %cond.load166
+; RV32-ZVFHMIN-NEXT:    lh a2, 112(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 57
+; RV32-ZVFHMIN-NEXT:    li a4, 56
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 6
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_63
+; RV32-ZVFHMIN-NEXT:  .LBB28_126: # %cond.load169
+; RV32-ZVFHMIN-NEXT:    lh a2, 114(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 58
+; RV32-ZVFHMIN-NEXT:    li a4, 57
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 5
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_64
+; RV32-ZVFHMIN-NEXT:  .LBB28_127: # %cond.load172
+; RV32-ZVFHMIN-NEXT:    lh a2, 116(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 59
+; RV32-ZVFHMIN-NEXT:    li a4, 58
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 4
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_65
+; RV32-ZVFHMIN-NEXT:  .LBB28_128: # %cond.load175
+; RV32-ZVFHMIN-NEXT:    lh a2, 118(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 60
+; RV32-ZVFHMIN-NEXT:    li a4, 59
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 3
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_66
+; RV32-ZVFHMIN-NEXT:  .LBB28_129: # %cond.load178
+; RV32-ZVFHMIN-NEXT:    lh a2, 120(a0)
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 61
+; RV32-ZVFHMIN-NEXT:    li a4, 60
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a4
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 2
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_67
+; RV32-ZVFHMIN-NEXT:    j .LBB28_68
+; RV32-ZVFHMIN-NEXT:  .LBB28_130:
+; RV32-ZVFHMIN-NEXT:    # implicit-def: $v16m8
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 2
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_132
+; RV32-ZVFHMIN-NEXT:  .LBB28_131: # %cond.load193
+; RV32-ZVFHMIN-NEXT:    lh a3, 130(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v25, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v25, 1
+; RV32-ZVFHMIN-NEXT:  .LBB28_132: # %else194
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 4
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_197
+; RV32-ZVFHMIN-NEXT:  # %bb.133: # %else197
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 8
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_198
+; RV32-ZVFHMIN-NEXT:  .LBB28_134: # %else200
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 16
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_199
+; RV32-ZVFHMIN-NEXT:  .LBB28_135: # %else203
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 32
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_200
+; RV32-ZVFHMIN-NEXT:  .LBB28_136: # %else206
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 64
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_201
+; RV32-ZVFHMIN-NEXT:  .LBB28_137: # %else209
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 128
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_202
+; RV32-ZVFHMIN-NEXT:  .LBB28_138: # %else212
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 256
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_203
+; RV32-ZVFHMIN-NEXT:  .LBB28_139: # %else215
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 512
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_204
+; RV32-ZVFHMIN-NEXT:  .LBB28_140: # %else218
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 1024
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_205
+; RV32-ZVFHMIN-NEXT:  .LBB28_141: # %else221
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 20
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_206
+; RV32-ZVFHMIN-NEXT:  .LBB28_142: # %else224
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 19
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_207
+; RV32-ZVFHMIN-NEXT:  .LBB28_143: # %else227
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 18
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_208
+; RV32-ZVFHMIN-NEXT:  .LBB28_144: # %else230
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 17
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_209
+; RV32-ZVFHMIN-NEXT:  .LBB28_145: # %else233
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 16
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_210
+; RV32-ZVFHMIN-NEXT:  .LBB28_146: # %else236
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 15
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_211
+; RV32-ZVFHMIN-NEXT:  .LBB28_147: # %else239
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 14
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_212
+; RV32-ZVFHMIN-NEXT:  .LBB28_148: # %else242
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 13
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_213
+; RV32-ZVFHMIN-NEXT:  .LBB28_149: # %else245
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 12
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_214
+; RV32-ZVFHMIN-NEXT:  .LBB28_150: # %else248
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 11
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_215
+; RV32-ZVFHMIN-NEXT:  .LBB28_151: # %else251
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 10
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_216
+; RV32-ZVFHMIN-NEXT:  .LBB28_152: # %else254
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 9
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_217
+; RV32-ZVFHMIN-NEXT:  .LBB28_153: # %else257
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 8
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_218
+; RV32-ZVFHMIN-NEXT:  .LBB28_154: # %else260
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 7
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_219
+; RV32-ZVFHMIN-NEXT:  .LBB28_155: # %else263
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 6
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_220
+; RV32-ZVFHMIN-NEXT:  .LBB28_156: # %else266
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 5
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_221
+; RV32-ZVFHMIN-NEXT:  .LBB28_157: # %else269
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 4
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_222
+; RV32-ZVFHMIN-NEXT:  .LBB28_158: # %else272
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 3
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_223
+; RV32-ZVFHMIN-NEXT:  .LBB28_159: # %else275
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 2
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_161
+; RV32-ZVFHMIN-NEXT:  .LBB28_160: # %cond.load277
+; RV32-ZVFHMIN-NEXT:    lh a3, 186(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 30, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v28, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v28, 29
+; RV32-ZVFHMIN-NEXT:  .LBB28_161: # %else278
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 1
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vsrl.vx v24, v24, a1
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_163
+; RV32-ZVFHMIN-NEXT:  # %bb.162: # %cond.load280
+; RV32-ZVFHMIN-NEXT:    lh a1, 188(a0)
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v28, a1
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 31, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v28, 30
+; RV32-ZVFHMIN-NEXT:  .LBB28_163: # %else281
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_224
+; RV32-ZVFHMIN-NEXT:  # %bb.164: # %else284
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 1
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_225
+; RV32-ZVFHMIN-NEXT:  .LBB28_165: # %else287
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 2
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_226
+; RV32-ZVFHMIN-NEXT:  .LBB28_166: # %else290
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 4
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_227
+; RV32-ZVFHMIN-NEXT:  .LBB28_167: # %else293
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 8
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_228
+; RV32-ZVFHMIN-NEXT:  .LBB28_168: # %else296
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 16
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_229
+; RV32-ZVFHMIN-NEXT:  .LBB28_169: # %else299
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 32
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_230
+; RV32-ZVFHMIN-NEXT:  .LBB28_170: # %else302
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 64
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_231
+; RV32-ZVFHMIN-NEXT:  .LBB28_171: # %else305
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 128
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_232
+; RV32-ZVFHMIN-NEXT:  .LBB28_172: # %else308
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 256
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_233
+; RV32-ZVFHMIN-NEXT:  .LBB28_173: # %else311
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 512
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_234
+; RV32-ZVFHMIN-NEXT:  .LBB28_174: # %else314
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 1024
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_235
+; RV32-ZVFHMIN-NEXT:  .LBB28_175: # %else317
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 20
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_236
+; RV32-ZVFHMIN-NEXT:  .LBB28_176: # %else320
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 19
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_237
+; RV32-ZVFHMIN-NEXT:  .LBB28_177: # %else323
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 18
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_238
+; RV32-ZVFHMIN-NEXT:  .LBB28_178: # %else326
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 17
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_239
+; RV32-ZVFHMIN-NEXT:  .LBB28_179: # %else329
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 16
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_240
+; RV32-ZVFHMIN-NEXT:  .LBB28_180: # %else332
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 15
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_241
+; RV32-ZVFHMIN-NEXT:  .LBB28_181: # %else335
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 14
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_242
+; RV32-ZVFHMIN-NEXT:  .LBB28_182: # %else338
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 13
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_243
+; RV32-ZVFHMIN-NEXT:  .LBB28_183: # %else341
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 12
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_244
+; RV32-ZVFHMIN-NEXT:  .LBB28_184: # %else344
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 11
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_245
+; RV32-ZVFHMIN-NEXT:  .LBB28_185: # %else347
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 10
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_246
+; RV32-ZVFHMIN-NEXT:  .LBB28_186: # %else350
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 9
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_247
+; RV32-ZVFHMIN-NEXT:  .LBB28_187: # %else353
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 8
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_248
+; RV32-ZVFHMIN-NEXT:  .LBB28_188: # %else356
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 7
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_249
+; RV32-ZVFHMIN-NEXT:  .LBB28_189: # %else359
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 6
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_250
+; RV32-ZVFHMIN-NEXT:  .LBB28_190: # %else362
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 5
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_251
+; RV32-ZVFHMIN-NEXT:  .LBB28_191: # %else365
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 4
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_252
+; RV32-ZVFHMIN-NEXT:  .LBB28_192: # %else368
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 3
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_253
+; RV32-ZVFHMIN-NEXT:  .LBB28_193: # %else371
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 2
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_254
+; RV32-ZVFHMIN-NEXT:  .LBB28_194: # %else374
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 1
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_255
+; RV32-ZVFHMIN-NEXT:  .LBB28_195: # %else377
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_256
+; RV32-ZVFHMIN-NEXT:  .LBB28_196: # %else380
+; RV32-ZVFHMIN-NEXT:    ret
+; RV32-ZVFHMIN-NEXT:  .LBB28_197: # %cond.load196
+; RV32-ZVFHMIN-NEXT:    lh a3, 132(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v25, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v25, 2
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 8
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_134
+; RV32-ZVFHMIN-NEXT:  .LBB28_198: # %cond.load199
+; RV32-ZVFHMIN-NEXT:    lh a3, 134(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v25, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v25, 3
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 16
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_135
+; RV32-ZVFHMIN-NEXT:  .LBB28_199: # %cond.load202
+; RV32-ZVFHMIN-NEXT:    lh a3, 136(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v25, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v25, 4
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 32
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_136
+; RV32-ZVFHMIN-NEXT:  .LBB28_200: # %cond.load205
+; RV32-ZVFHMIN-NEXT:    lh a3, 138(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v25, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v25, 5
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 64
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_137
+; RV32-ZVFHMIN-NEXT:  .LBB28_201: # %cond.load208
+; RV32-ZVFHMIN-NEXT:    lh a3, 140(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v25, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v25, 6
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 128
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_138
+; RV32-ZVFHMIN-NEXT:  .LBB28_202: # %cond.load211
+; RV32-ZVFHMIN-NEXT:    lh a3, 142(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v25, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v25, 7
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 256
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_139
+; RV32-ZVFHMIN-NEXT:  .LBB28_203: # %cond.load214
+; RV32-ZVFHMIN-NEXT:    lh a3, 144(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 9, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v26, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v26, 8
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 512
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_140
+; RV32-ZVFHMIN-NEXT:  .LBB28_204: # %cond.load217
+; RV32-ZVFHMIN-NEXT:    lh a3, 146(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 10, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v26, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v26, 9
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 1024
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_141
+; RV32-ZVFHMIN-NEXT:  .LBB28_205: # %cond.load220
+; RV32-ZVFHMIN-NEXT:    lh a3, 148(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 11, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v26, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v26, 10
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 20
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_142
+; RV32-ZVFHMIN-NEXT:  .LBB28_206: # %cond.load223
+; RV32-ZVFHMIN-NEXT:    lh a3, 150(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v26, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v26, 11
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 19
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_143
+; RV32-ZVFHMIN-NEXT:  .LBB28_207: # %cond.load226
+; RV32-ZVFHMIN-NEXT:    lh a3, 152(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 13, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v26, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v26, 12
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 18
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_144
+; RV32-ZVFHMIN-NEXT:  .LBB28_208: # %cond.load229
+; RV32-ZVFHMIN-NEXT:    lh a3, 154(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 14, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v26, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v26, 13
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 17
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_145
+; RV32-ZVFHMIN-NEXT:  .LBB28_209: # %cond.load232
+; RV32-ZVFHMIN-NEXT:    lh a3, 156(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 15, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v26, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v26, 14
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 16
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_146
+; RV32-ZVFHMIN-NEXT:  .LBB28_210: # %cond.load235
+; RV32-ZVFHMIN-NEXT:    lh a3, 158(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v26, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v26, 15
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 15
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_147
+; RV32-ZVFHMIN-NEXT:  .LBB28_211: # %cond.load238
+; RV32-ZVFHMIN-NEXT:    lh a3, 160(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 17, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v28, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v28, 16
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 14
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_148
+; RV32-ZVFHMIN-NEXT:  .LBB28_212: # %cond.load241
+; RV32-ZVFHMIN-NEXT:    lh a3, 162(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 18, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v28, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v28, 17
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 13
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_149
+; RV32-ZVFHMIN-NEXT:  .LBB28_213: # %cond.load244
+; RV32-ZVFHMIN-NEXT:    lh a3, 164(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 19, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v28, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v28, 18
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 12
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_150
+; RV32-ZVFHMIN-NEXT:  .LBB28_214: # %cond.load247
+; RV32-ZVFHMIN-NEXT:    lh a3, 166(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 20, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v28, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v28, 19
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 11
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_151
+; RV32-ZVFHMIN-NEXT:  .LBB28_215: # %cond.load250
+; RV32-ZVFHMIN-NEXT:    lh a3, 168(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 21, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v28, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v28, 20
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 10
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_152
+; RV32-ZVFHMIN-NEXT:  .LBB28_216: # %cond.load253
+; RV32-ZVFHMIN-NEXT:    lh a3, 170(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 22, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v28, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v28, 21
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 9
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_153
+; RV32-ZVFHMIN-NEXT:  .LBB28_217: # %cond.load256
+; RV32-ZVFHMIN-NEXT:    lh a3, 172(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 23, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v28, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v28, 22
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 8
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_154
+; RV32-ZVFHMIN-NEXT:  .LBB28_218: # %cond.load259
+; RV32-ZVFHMIN-NEXT:    lh a3, 174(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 24, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v28, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v28, 23
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 7
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_155
+; RV32-ZVFHMIN-NEXT:  .LBB28_219: # %cond.load262
+; RV32-ZVFHMIN-NEXT:    lh a3, 176(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 25, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v28, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v28, 24
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 6
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_156
+; RV32-ZVFHMIN-NEXT:  .LBB28_220: # %cond.load265
+; RV32-ZVFHMIN-NEXT:    lh a3, 178(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 26, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v28, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v28, 25
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 5
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_157
+; RV32-ZVFHMIN-NEXT:  .LBB28_221: # %cond.load268
+; RV32-ZVFHMIN-NEXT:    lh a3, 180(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 27, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v28, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v28, 26
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 4
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_158
+; RV32-ZVFHMIN-NEXT:  .LBB28_222: # %cond.load271
+; RV32-ZVFHMIN-NEXT:    lh a3, 182(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 28, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v28, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v28, 27
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 3
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_159
+; RV32-ZVFHMIN-NEXT:  .LBB28_223: # %cond.load274
+; RV32-ZVFHMIN-NEXT:    lh a3, 184(a0)
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 29, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v28, a3
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v28, 28
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 2
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_160
+; RV32-ZVFHMIN-NEXT:    j .LBB28_161
+; RV32-ZVFHMIN-NEXT:  .LBB28_224: # %cond.load283
+; RV32-ZVFHMIN-NEXT:    lh a2, 190(a0)
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 32
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 31
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 1
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_165
+; RV32-ZVFHMIN-NEXT:  .LBB28_225: # %cond.load286
+; RV32-ZVFHMIN-NEXT:    lh a2, 192(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 33
+; RV32-ZVFHMIN-NEXT:    li a3, 32
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 2
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_166
+; RV32-ZVFHMIN-NEXT:  .LBB28_226: # %cond.load289
+; RV32-ZVFHMIN-NEXT:    lh a2, 194(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 34
+; RV32-ZVFHMIN-NEXT:    li a3, 33
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 4
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_167
+; RV32-ZVFHMIN-NEXT:  .LBB28_227: # %cond.load292
+; RV32-ZVFHMIN-NEXT:    lh a2, 196(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 35
+; RV32-ZVFHMIN-NEXT:    li a3, 34
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 8
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_168
+; RV32-ZVFHMIN-NEXT:  .LBB28_228: # %cond.load295
+; RV32-ZVFHMIN-NEXT:    lh a2, 198(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 36
+; RV32-ZVFHMIN-NEXT:    li a3, 35
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 16
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_169
+; RV32-ZVFHMIN-NEXT:  .LBB28_229: # %cond.load298
+; RV32-ZVFHMIN-NEXT:    lh a2, 200(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 37
+; RV32-ZVFHMIN-NEXT:    li a3, 36
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 32
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_170
+; RV32-ZVFHMIN-NEXT:  .LBB28_230: # %cond.load301
+; RV32-ZVFHMIN-NEXT:    lh a2, 202(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 38
+; RV32-ZVFHMIN-NEXT:    li a3, 37
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 64
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_171
+; RV32-ZVFHMIN-NEXT:  .LBB28_231: # %cond.load304
+; RV32-ZVFHMIN-NEXT:    lh a2, 204(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 39
+; RV32-ZVFHMIN-NEXT:    li a3, 38
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 128
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_172
+; RV32-ZVFHMIN-NEXT:  .LBB28_232: # %cond.load307
+; RV32-ZVFHMIN-NEXT:    lh a2, 206(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 40
+; RV32-ZVFHMIN-NEXT:    li a3, 39
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 256
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_173
+; RV32-ZVFHMIN-NEXT:  .LBB28_233: # %cond.load310
+; RV32-ZVFHMIN-NEXT:    lh a2, 208(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 41
+; RV32-ZVFHMIN-NEXT:    li a3, 40
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 512
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_174
+; RV32-ZVFHMIN-NEXT:  .LBB28_234: # %cond.load313
+; RV32-ZVFHMIN-NEXT:    lh a2, 210(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 42
+; RV32-ZVFHMIN-NEXT:    li a3, 41
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 1024
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_175
+; RV32-ZVFHMIN-NEXT:  .LBB28_235: # %cond.load316
+; RV32-ZVFHMIN-NEXT:    lh a2, 212(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 43
+; RV32-ZVFHMIN-NEXT:    li a3, 42
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 20
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_176
+; RV32-ZVFHMIN-NEXT:  .LBB28_236: # %cond.load319
+; RV32-ZVFHMIN-NEXT:    lh a2, 214(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 44
+; RV32-ZVFHMIN-NEXT:    li a3, 43
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 19
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_177
+; RV32-ZVFHMIN-NEXT:  .LBB28_237: # %cond.load322
+; RV32-ZVFHMIN-NEXT:    lh a2, 216(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 45
+; RV32-ZVFHMIN-NEXT:    li a3, 44
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 18
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_178
+; RV32-ZVFHMIN-NEXT:  .LBB28_238: # %cond.load325
+; RV32-ZVFHMIN-NEXT:    lh a2, 218(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 46
+; RV32-ZVFHMIN-NEXT:    li a3, 45
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 17
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_179
+; RV32-ZVFHMIN-NEXT:  .LBB28_239: # %cond.load328
+; RV32-ZVFHMIN-NEXT:    lh a2, 220(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 47
+; RV32-ZVFHMIN-NEXT:    li a3, 46
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 16
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_180
+; RV32-ZVFHMIN-NEXT:  .LBB28_240: # %cond.load331
+; RV32-ZVFHMIN-NEXT:    lh a2, 222(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 48
+; RV32-ZVFHMIN-NEXT:    li a3, 47
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 15
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_181
+; RV32-ZVFHMIN-NEXT:  .LBB28_241: # %cond.load334
+; RV32-ZVFHMIN-NEXT:    lh a2, 224(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 49
+; RV32-ZVFHMIN-NEXT:    li a3, 48
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 14
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_182
+; RV32-ZVFHMIN-NEXT:  .LBB28_242: # %cond.load337
+; RV32-ZVFHMIN-NEXT:    lh a2, 226(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 50
+; RV32-ZVFHMIN-NEXT:    li a3, 49
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 13
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_183
+; RV32-ZVFHMIN-NEXT:  .LBB28_243: # %cond.load340
+; RV32-ZVFHMIN-NEXT:    lh a2, 228(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 51
+; RV32-ZVFHMIN-NEXT:    li a3, 50
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 12
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_184
+; RV32-ZVFHMIN-NEXT:  .LBB28_244: # %cond.load343
+; RV32-ZVFHMIN-NEXT:    lh a2, 230(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 52
+; RV32-ZVFHMIN-NEXT:    li a3, 51
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 11
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_185
+; RV32-ZVFHMIN-NEXT:  .LBB28_245: # %cond.load346
+; RV32-ZVFHMIN-NEXT:    lh a2, 232(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 53
+; RV32-ZVFHMIN-NEXT:    li a3, 52
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 10
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_186
+; RV32-ZVFHMIN-NEXT:  .LBB28_246: # %cond.load349
+; RV32-ZVFHMIN-NEXT:    lh a2, 234(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 54
+; RV32-ZVFHMIN-NEXT:    li a3, 53
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 9
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_187
+; RV32-ZVFHMIN-NEXT:  .LBB28_247: # %cond.load352
+; RV32-ZVFHMIN-NEXT:    lh a2, 236(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 55
+; RV32-ZVFHMIN-NEXT:    li a3, 54
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 8
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_188
+; RV32-ZVFHMIN-NEXT:  .LBB28_248: # %cond.load355
+; RV32-ZVFHMIN-NEXT:    lh a2, 238(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 56
+; RV32-ZVFHMIN-NEXT:    li a3, 55
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 7
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_189
+; RV32-ZVFHMIN-NEXT:  .LBB28_249: # %cond.load358
+; RV32-ZVFHMIN-NEXT:    lh a2, 240(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 57
+; RV32-ZVFHMIN-NEXT:    li a3, 56
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 6
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_190
+; RV32-ZVFHMIN-NEXT:  .LBB28_250: # %cond.load361
+; RV32-ZVFHMIN-NEXT:    lh a2, 242(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 58
+; RV32-ZVFHMIN-NEXT:    li a3, 57
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 5
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_191
+; RV32-ZVFHMIN-NEXT:  .LBB28_251: # %cond.load364
+; RV32-ZVFHMIN-NEXT:    lh a2, 244(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 59
+; RV32-ZVFHMIN-NEXT:    li a3, 58
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 4
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_192
+; RV32-ZVFHMIN-NEXT:  .LBB28_252: # %cond.load367
+; RV32-ZVFHMIN-NEXT:    lh a2, 246(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 60
+; RV32-ZVFHMIN-NEXT:    li a3, 59
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 3
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_193
+; RV32-ZVFHMIN-NEXT:  .LBB28_253: # %cond.load370
+; RV32-ZVFHMIN-NEXT:    lh a2, 248(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 61
+; RV32-ZVFHMIN-NEXT:    li a3, 60
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 2
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_194
+; RV32-ZVFHMIN-NEXT:  .LBB28_254: # %cond.load373
+; RV32-ZVFHMIN-NEXT:    lh a2, 250(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 62
+; RV32-ZVFHMIN-NEXT:    li a3, 61
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 1
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_195
+; RV32-ZVFHMIN-NEXT:  .LBB28_255: # %cond.load376
+; RV32-ZVFHMIN-NEXT:    lh a2, 252(a0)
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV32-ZVFHMIN-NEXT:    li a2, 63
+; RV32-ZVFHMIN-NEXT:    li a3, 62
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_196
+; RV32-ZVFHMIN-NEXT:  .LBB28_256: # %cond.load379
+; RV32-ZVFHMIN-NEXT:    lh a0, 254(a0)
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.s.x v24, a0
+; RV32-ZVFHMIN-NEXT:    li a0, 63
+; RV32-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a0
+; RV32-ZVFHMIN-NEXT:    ret
+;
+; RV64-ZVFHMIN-LABEL: masked_load_v128f16:
+; RV64-ZVFHMIN:       # %bb.0:
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v0
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 1
+; RV64-ZVFHMIN-NEXT:    beqz a1, .LBB28_2
+; RV64-ZVFHMIN-NEXT:  # %bb.1: # %cond.load
+; RV64-ZVFHMIN-NEXT:    lh a1, 0(a0)
+; RV64-ZVFHMIN-NEXT:    fmv.x.h a3, fa5
+; RV64-ZVFHMIN-NEXT:    li a4, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.v.x v8, a3
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v8, a1
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 2
+; RV64-ZVFHMIN-NEXT:    bnez a1, .LBB28_3
+; RV64-ZVFHMIN-NEXT:    j .LBB28_4
+; RV64-ZVFHMIN-NEXT:  .LBB28_2:
+; RV64-ZVFHMIN-NEXT:    # implicit-def: $v8m8
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 2
+; RV64-ZVFHMIN-NEXT:    beqz a1, .LBB28_4
+; RV64-ZVFHMIN-NEXT:  .LBB28_3: # %cond.load1
+; RV64-ZVFHMIN-NEXT:    lh a1, 2(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 1
+; RV64-ZVFHMIN-NEXT:  .LBB28_4: # %else2
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 4
+; RV64-ZVFHMIN-NEXT:    bnez a1, .LBB28_71
+; RV64-ZVFHMIN-NEXT:  # %bb.5: # %else5
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 8
+; RV64-ZVFHMIN-NEXT:    bnez a1, .LBB28_72
+; RV64-ZVFHMIN-NEXT:  .LBB28_6: # %else8
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 16
+; RV64-ZVFHMIN-NEXT:    bnez a1, .LBB28_73
+; RV64-ZVFHMIN-NEXT:  .LBB28_7: # %else11
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 32
+; RV64-ZVFHMIN-NEXT:    bnez a1, .LBB28_74
+; RV64-ZVFHMIN-NEXT:  .LBB28_8: # %else14
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 64
+; RV64-ZVFHMIN-NEXT:    bnez a1, .LBB28_75
+; RV64-ZVFHMIN-NEXT:  .LBB28_9: # %else17
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 128
+; RV64-ZVFHMIN-NEXT:    bnez a1, .LBB28_76
+; RV64-ZVFHMIN-NEXT:  .LBB28_10: # %else20
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 256
+; RV64-ZVFHMIN-NEXT:    bnez a1, .LBB28_77
+; RV64-ZVFHMIN-NEXT:  .LBB28_11: # %else23
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 512
+; RV64-ZVFHMIN-NEXT:    bnez a1, .LBB28_78
+; RV64-ZVFHMIN-NEXT:  .LBB28_12: # %else26
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 1024
+; RV64-ZVFHMIN-NEXT:    bnez a1, .LBB28_79
+; RV64-ZVFHMIN-NEXT:  .LBB28_13: # %else29
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 52
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_80
+; RV64-ZVFHMIN-NEXT:  .LBB28_14: # %else32
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 51
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_81
+; RV64-ZVFHMIN-NEXT:  .LBB28_15: # %else35
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 50
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_82
+; RV64-ZVFHMIN-NEXT:  .LBB28_16: # %else38
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 49
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_83
+; RV64-ZVFHMIN-NEXT:  .LBB28_17: # %else41
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 48
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_84
+; RV64-ZVFHMIN-NEXT:  .LBB28_18: # %else44
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 47
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_85
+; RV64-ZVFHMIN-NEXT:  .LBB28_19: # %else47
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 46
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_86
+; RV64-ZVFHMIN-NEXT:  .LBB28_20: # %else50
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 45
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_87
+; RV64-ZVFHMIN-NEXT:  .LBB28_21: # %else53
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 44
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_88
+; RV64-ZVFHMIN-NEXT:  .LBB28_22: # %else56
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 43
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_89
+; RV64-ZVFHMIN-NEXT:  .LBB28_23: # %else59
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 42
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_90
+; RV64-ZVFHMIN-NEXT:  .LBB28_24: # %else62
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 41
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_91
+; RV64-ZVFHMIN-NEXT:  .LBB28_25: # %else65
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 40
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_92
+; RV64-ZVFHMIN-NEXT:  .LBB28_26: # %else68
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 39
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_93
+; RV64-ZVFHMIN-NEXT:  .LBB28_27: # %else71
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 38
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_94
+; RV64-ZVFHMIN-NEXT:  .LBB28_28: # %else74
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 37
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_95
+; RV64-ZVFHMIN-NEXT:  .LBB28_29: # %else77
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 36
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_96
+; RV64-ZVFHMIN-NEXT:  .LBB28_30: # %else80
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 35
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_97
+; RV64-ZVFHMIN-NEXT:  .LBB28_31: # %else83
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 34
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_98
+; RV64-ZVFHMIN-NEXT:  .LBB28_32: # %else86
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 33
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_99
+; RV64-ZVFHMIN-NEXT:  .LBB28_33: # %else89
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 32
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_100
+; RV64-ZVFHMIN-NEXT:  .LBB28_34: # %else92
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 31
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_101
+; RV64-ZVFHMIN-NEXT:  .LBB28_35: # %else95
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 30
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_102
+; RV64-ZVFHMIN-NEXT:  .LBB28_36: # %else98
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 29
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_103
+; RV64-ZVFHMIN-NEXT:  .LBB28_37: # %else101
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 28
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_104
+; RV64-ZVFHMIN-NEXT:  .LBB28_38: # %else104
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 27
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_105
+; RV64-ZVFHMIN-NEXT:  .LBB28_39: # %else107
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 26
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_106
+; RV64-ZVFHMIN-NEXT:  .LBB28_40: # %else110
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 25
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_107
+; RV64-ZVFHMIN-NEXT:  .LBB28_41: # %else113
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 24
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_108
+; RV64-ZVFHMIN-NEXT:  .LBB28_42: # %else116
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 23
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_109
+; RV64-ZVFHMIN-NEXT:  .LBB28_43: # %else119
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 22
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_110
+; RV64-ZVFHMIN-NEXT:  .LBB28_44: # %else122
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 21
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_111
+; RV64-ZVFHMIN-NEXT:  .LBB28_45: # %else125
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 20
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_112
+; RV64-ZVFHMIN-NEXT:  .LBB28_46: # %else128
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 19
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_113
+; RV64-ZVFHMIN-NEXT:  .LBB28_47: # %else131
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 18
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_114
+; RV64-ZVFHMIN-NEXT:  .LBB28_48: # %else134
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 17
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_115
+; RV64-ZVFHMIN-NEXT:  .LBB28_49: # %else137
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 16
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_116
+; RV64-ZVFHMIN-NEXT:  .LBB28_50: # %else140
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 15
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_117
+; RV64-ZVFHMIN-NEXT:  .LBB28_51: # %else143
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 14
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_118
+; RV64-ZVFHMIN-NEXT:  .LBB28_52: # %else146
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 13
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_119
+; RV64-ZVFHMIN-NEXT:  .LBB28_53: # %else149
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 12
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_120
+; RV64-ZVFHMIN-NEXT:  .LBB28_54: # %else152
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 11
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_121
+; RV64-ZVFHMIN-NEXT:  .LBB28_55: # %else155
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 10
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_122
+; RV64-ZVFHMIN-NEXT:  .LBB28_56: # %else158
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 9
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_123
+; RV64-ZVFHMIN-NEXT:  .LBB28_57: # %else161
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 8
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_124
+; RV64-ZVFHMIN-NEXT:  .LBB28_58: # %else164
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 7
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_125
+; RV64-ZVFHMIN-NEXT:  .LBB28_59: # %else167
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 6
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_126
+; RV64-ZVFHMIN-NEXT:  .LBB28_60: # %else170
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 5
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_127
+; RV64-ZVFHMIN-NEXT:  .LBB28_61: # %else173
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 4
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_128
+; RV64-ZVFHMIN-NEXT:  .LBB28_62: # %else176
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 3
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_129
+; RV64-ZVFHMIN-NEXT:  .LBB28_63: # %else179
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 2
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_65
+; RV64-ZVFHMIN-NEXT:  .LBB28_64: # %cond.load181
+; RV64-ZVFHMIN-NEXT:    lh a1, 122(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 62
+; RV64-ZVFHMIN-NEXT:    li a3, 61
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:  .LBB28_65: # %else182
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 1
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v16, v0, 1
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_67
+; RV64-ZVFHMIN-NEXT:  # %bb.66: # %cond.load184
+; RV64-ZVFHMIN-NEXT:    lh a1, 124(a0)
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 63
+; RV64-ZVFHMIN-NEXT:    li a3, 62
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v24, a3
+; RV64-ZVFHMIN-NEXT:  .LBB28_67: # %else185
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v16
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_69
+; RV64-ZVFHMIN-NEXT:  # %bb.68: # %cond.load187
+; RV64-ZVFHMIN-NEXT:    lh a2, 126(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 63
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a2
+; RV64-ZVFHMIN-NEXT:  .LBB28_69: # %else188
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 1
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB28_130
+; RV64-ZVFHMIN-NEXT:  # %bb.70: # %cond.load190
+; RV64-ZVFHMIN-NEXT:    lh a2, 128(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a2
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 2
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB28_131
+; RV64-ZVFHMIN-NEXT:    j .LBB28_132
+; RV64-ZVFHMIN-NEXT:  .LBB28_71: # %cond.load4
+; RV64-ZVFHMIN-NEXT:    lh a1, 4(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 2
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 8
+; RV64-ZVFHMIN-NEXT:    beqz a1, .LBB28_6
+; RV64-ZVFHMIN-NEXT:  .LBB28_72: # %cond.load7
+; RV64-ZVFHMIN-NEXT:    lh a1, 6(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 3
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 16
+; RV64-ZVFHMIN-NEXT:    beqz a1, .LBB28_7
+; RV64-ZVFHMIN-NEXT:  .LBB28_73: # %cond.load10
+; RV64-ZVFHMIN-NEXT:    lh a1, 8(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 4
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 32
+; RV64-ZVFHMIN-NEXT:    beqz a1, .LBB28_8
+; RV64-ZVFHMIN-NEXT:  .LBB28_74: # %cond.load13
+; RV64-ZVFHMIN-NEXT:    lh a1, 10(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 5
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 64
+; RV64-ZVFHMIN-NEXT:    beqz a1, .LBB28_9
+; RV64-ZVFHMIN-NEXT:  .LBB28_75: # %cond.load16
+; RV64-ZVFHMIN-NEXT:    lh a1, 12(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 6
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 128
+; RV64-ZVFHMIN-NEXT:    beqz a1, .LBB28_10
+; RV64-ZVFHMIN-NEXT:  .LBB28_76: # %cond.load19
+; RV64-ZVFHMIN-NEXT:    lh a1, 14(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 7
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 256
+; RV64-ZVFHMIN-NEXT:    beqz a1, .LBB28_11
+; RV64-ZVFHMIN-NEXT:  .LBB28_77: # %cond.load22
+; RV64-ZVFHMIN-NEXT:    lh a1, 16(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 9, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 8
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 512
+; RV64-ZVFHMIN-NEXT:    beqz a1, .LBB28_12
+; RV64-ZVFHMIN-NEXT:  .LBB28_78: # %cond.load25
+; RV64-ZVFHMIN-NEXT:    lh a1, 18(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 10, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 9
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 1024
+; RV64-ZVFHMIN-NEXT:    beqz a1, .LBB28_13
+; RV64-ZVFHMIN-NEXT:  .LBB28_79: # %cond.load28
+; RV64-ZVFHMIN-NEXT:    lh a1, 20(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 11, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 10
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 52
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_14
+; RV64-ZVFHMIN-NEXT:  .LBB28_80: # %cond.load31
+; RV64-ZVFHMIN-NEXT:    lh a1, 22(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 11
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 51
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_15
+; RV64-ZVFHMIN-NEXT:  .LBB28_81: # %cond.load34
+; RV64-ZVFHMIN-NEXT:    lh a1, 24(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 13, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 12
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 50
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_16
+; RV64-ZVFHMIN-NEXT:  .LBB28_82: # %cond.load37
+; RV64-ZVFHMIN-NEXT:    lh a1, 26(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 14, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 13
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 49
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_17
+; RV64-ZVFHMIN-NEXT:  .LBB28_83: # %cond.load40
+; RV64-ZVFHMIN-NEXT:    lh a1, 28(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 15, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 14
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 48
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_18
+; RV64-ZVFHMIN-NEXT:  .LBB28_84: # %cond.load43
+; RV64-ZVFHMIN-NEXT:    lh a1, 30(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 15
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 47
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_19
+; RV64-ZVFHMIN-NEXT:  .LBB28_85: # %cond.load46
+; RV64-ZVFHMIN-NEXT:    lh a1, 32(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 17, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 16
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 46
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_20
+; RV64-ZVFHMIN-NEXT:  .LBB28_86: # %cond.load49
+; RV64-ZVFHMIN-NEXT:    lh a1, 34(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 18, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 17
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 45
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_21
+; RV64-ZVFHMIN-NEXT:  .LBB28_87: # %cond.load52
+; RV64-ZVFHMIN-NEXT:    lh a1, 36(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 19, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 18
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 44
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_22
+; RV64-ZVFHMIN-NEXT:  .LBB28_88: # %cond.load55
+; RV64-ZVFHMIN-NEXT:    lh a1, 38(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 20, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 19
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 43
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_23
+; RV64-ZVFHMIN-NEXT:  .LBB28_89: # %cond.load58
+; RV64-ZVFHMIN-NEXT:    lh a1, 40(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 21, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 20
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 42
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_24
+; RV64-ZVFHMIN-NEXT:  .LBB28_90: # %cond.load61
+; RV64-ZVFHMIN-NEXT:    lh a1, 42(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 22, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 21
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 41
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_25
+; RV64-ZVFHMIN-NEXT:  .LBB28_91: # %cond.load64
+; RV64-ZVFHMIN-NEXT:    lh a1, 44(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 23, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 22
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 40
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_26
+; RV64-ZVFHMIN-NEXT:  .LBB28_92: # %cond.load67
+; RV64-ZVFHMIN-NEXT:    lh a1, 46(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 24, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 23
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 39
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_27
+; RV64-ZVFHMIN-NEXT:  .LBB28_93: # %cond.load70
+; RV64-ZVFHMIN-NEXT:    lh a1, 48(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 25, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 24
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 38
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_28
+; RV64-ZVFHMIN-NEXT:  .LBB28_94: # %cond.load73
+; RV64-ZVFHMIN-NEXT:    lh a1, 50(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 26, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 25
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 37
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_29
+; RV64-ZVFHMIN-NEXT:  .LBB28_95: # %cond.load76
+; RV64-ZVFHMIN-NEXT:    lh a1, 52(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 27, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vmv8r.v v24, v8
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 26
+; RV64-ZVFHMIN-NEXT:    vmv4r.v v24, v8
+; RV64-ZVFHMIN-NEXT:    vmv8r.v v8, v24
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 36
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_30
+; RV64-ZVFHMIN-NEXT:  .LBB28_96: # %cond.load79
+; RV64-ZVFHMIN-NEXT:    lh a1, 54(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 28, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vmv8r.v v24, v8
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 27
+; RV64-ZVFHMIN-NEXT:    vmv4r.v v24, v8
+; RV64-ZVFHMIN-NEXT:    vmv8r.v v8, v24
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 35
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_31
+; RV64-ZVFHMIN-NEXT:  .LBB28_97: # %cond.load82
+; RV64-ZVFHMIN-NEXT:    lh a1, 56(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 29, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vmv8r.v v24, v8
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 28
+; RV64-ZVFHMIN-NEXT:    vmv4r.v v24, v8
+; RV64-ZVFHMIN-NEXT:    vmv8r.v v8, v24
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 34
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_32
+; RV64-ZVFHMIN-NEXT:  .LBB28_98: # %cond.load85
+; RV64-ZVFHMIN-NEXT:    lh a1, 58(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 30, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vmv8r.v v24, v8
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 29
+; RV64-ZVFHMIN-NEXT:    vmv4r.v v24, v8
+; RV64-ZVFHMIN-NEXT:    vmv8r.v v8, v24
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 33
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_33
+; RV64-ZVFHMIN-NEXT:  .LBB28_99: # %cond.load88
+; RV64-ZVFHMIN-NEXT:    lh a1, 60(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 31, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vmv8r.v v24, v8
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 30
+; RV64-ZVFHMIN-NEXT:    vmv4r.v v24, v8
+; RV64-ZVFHMIN-NEXT:    vmv8r.v v8, v24
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 32
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_34
+; RV64-ZVFHMIN-NEXT:  .LBB28_100: # %cond.load91
+; RV64-ZVFHMIN-NEXT:    lh a1, 62(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    vmv8r.v v24, v8
+; RV64-ZVFHMIN-NEXT:    li a1, 32
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v8, v16, 31
+; RV64-ZVFHMIN-NEXT:    vmv4r.v v24, v8
+; RV64-ZVFHMIN-NEXT:    vmv8r.v v8, v24
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 31
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_35
+; RV64-ZVFHMIN-NEXT:  .LBB28_101: # %cond.load94
+; RV64-ZVFHMIN-NEXT:    lh a1, 64(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 33
+; RV64-ZVFHMIN-NEXT:    li a3, 32
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 30
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_36
+; RV64-ZVFHMIN-NEXT:  .LBB28_102: # %cond.load97
+; RV64-ZVFHMIN-NEXT:    lh a1, 66(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 34
+; RV64-ZVFHMIN-NEXT:    li a3, 33
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 29
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_37
+; RV64-ZVFHMIN-NEXT:  .LBB28_103: # %cond.load100
+; RV64-ZVFHMIN-NEXT:    lh a1, 68(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 35
+; RV64-ZVFHMIN-NEXT:    li a3, 34
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 28
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_38
+; RV64-ZVFHMIN-NEXT:  .LBB28_104: # %cond.load103
+; RV64-ZVFHMIN-NEXT:    lh a1, 70(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 36
+; RV64-ZVFHMIN-NEXT:    li a3, 35
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 27
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_39
+; RV64-ZVFHMIN-NEXT:  .LBB28_105: # %cond.load106
+; RV64-ZVFHMIN-NEXT:    lh a1, 72(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 37
+; RV64-ZVFHMIN-NEXT:    li a3, 36
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 26
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_40
+; RV64-ZVFHMIN-NEXT:  .LBB28_106: # %cond.load109
+; RV64-ZVFHMIN-NEXT:    lh a1, 74(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 38
+; RV64-ZVFHMIN-NEXT:    li a3, 37
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 25
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_41
+; RV64-ZVFHMIN-NEXT:  .LBB28_107: # %cond.load112
+; RV64-ZVFHMIN-NEXT:    lh a1, 76(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 39
+; RV64-ZVFHMIN-NEXT:    li a3, 38
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 24
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_42
+; RV64-ZVFHMIN-NEXT:  .LBB28_108: # %cond.load115
+; RV64-ZVFHMIN-NEXT:    lh a1, 78(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 40
+; RV64-ZVFHMIN-NEXT:    li a3, 39
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 23
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_43
+; RV64-ZVFHMIN-NEXT:  .LBB28_109: # %cond.load118
+; RV64-ZVFHMIN-NEXT:    lh a1, 80(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 41
+; RV64-ZVFHMIN-NEXT:    li a3, 40
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 22
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_44
+; RV64-ZVFHMIN-NEXT:  .LBB28_110: # %cond.load121
+; RV64-ZVFHMIN-NEXT:    lh a1, 82(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 42
+; RV64-ZVFHMIN-NEXT:    li a3, 41
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 21
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_45
+; RV64-ZVFHMIN-NEXT:  .LBB28_111: # %cond.load124
+; RV64-ZVFHMIN-NEXT:    lh a1, 84(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 43
+; RV64-ZVFHMIN-NEXT:    li a3, 42
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 20
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_46
+; RV64-ZVFHMIN-NEXT:  .LBB28_112: # %cond.load127
+; RV64-ZVFHMIN-NEXT:    lh a1, 86(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 44
+; RV64-ZVFHMIN-NEXT:    li a3, 43
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 19
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_47
+; RV64-ZVFHMIN-NEXT:  .LBB28_113: # %cond.load130
+; RV64-ZVFHMIN-NEXT:    lh a1, 88(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 45
+; RV64-ZVFHMIN-NEXT:    li a3, 44
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 18
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_48
+; RV64-ZVFHMIN-NEXT:  .LBB28_114: # %cond.load133
+; RV64-ZVFHMIN-NEXT:    lh a1, 90(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 46
+; RV64-ZVFHMIN-NEXT:    li a3, 45
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 17
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_49
+; RV64-ZVFHMIN-NEXT:  .LBB28_115: # %cond.load136
+; RV64-ZVFHMIN-NEXT:    lh a1, 92(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 47
+; RV64-ZVFHMIN-NEXT:    li a3, 46
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 16
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_50
+; RV64-ZVFHMIN-NEXT:  .LBB28_116: # %cond.load139
+; RV64-ZVFHMIN-NEXT:    lh a1, 94(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 48
+; RV64-ZVFHMIN-NEXT:    li a3, 47
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 15
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_51
+; RV64-ZVFHMIN-NEXT:  .LBB28_117: # %cond.load142
+; RV64-ZVFHMIN-NEXT:    lh a1, 96(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 49
+; RV64-ZVFHMIN-NEXT:    li a3, 48
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 14
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_52
+; RV64-ZVFHMIN-NEXT:  .LBB28_118: # %cond.load145
+; RV64-ZVFHMIN-NEXT:    lh a1, 98(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 50
+; RV64-ZVFHMIN-NEXT:    li a3, 49
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 13
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_53
+; RV64-ZVFHMIN-NEXT:  .LBB28_119: # %cond.load148
+; RV64-ZVFHMIN-NEXT:    lh a1, 100(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 51
+; RV64-ZVFHMIN-NEXT:    li a3, 50
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 12
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_54
+; RV64-ZVFHMIN-NEXT:  .LBB28_120: # %cond.load151
+; RV64-ZVFHMIN-NEXT:    lh a1, 102(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 52
+; RV64-ZVFHMIN-NEXT:    li a3, 51
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 11
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_55
+; RV64-ZVFHMIN-NEXT:  .LBB28_121: # %cond.load154
+; RV64-ZVFHMIN-NEXT:    lh a1, 104(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 53
+; RV64-ZVFHMIN-NEXT:    li a3, 52
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 10
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_56
+; RV64-ZVFHMIN-NEXT:  .LBB28_122: # %cond.load157
+; RV64-ZVFHMIN-NEXT:    lh a1, 106(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 54
+; RV64-ZVFHMIN-NEXT:    li a3, 53
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 9
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_57
+; RV64-ZVFHMIN-NEXT:  .LBB28_123: # %cond.load160
+; RV64-ZVFHMIN-NEXT:    lh a1, 108(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 55
+; RV64-ZVFHMIN-NEXT:    li a3, 54
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 8
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_58
+; RV64-ZVFHMIN-NEXT:  .LBB28_124: # %cond.load163
+; RV64-ZVFHMIN-NEXT:    lh a1, 110(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 56
+; RV64-ZVFHMIN-NEXT:    li a3, 55
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 7
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_59
+; RV64-ZVFHMIN-NEXT:  .LBB28_125: # %cond.load166
+; RV64-ZVFHMIN-NEXT:    lh a1, 112(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 57
+; RV64-ZVFHMIN-NEXT:    li a3, 56
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 6
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_60
+; RV64-ZVFHMIN-NEXT:  .LBB28_126: # %cond.load169
+; RV64-ZVFHMIN-NEXT:    lh a1, 114(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 58
+; RV64-ZVFHMIN-NEXT:    li a3, 57
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 5
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_61
+; RV64-ZVFHMIN-NEXT:  .LBB28_127: # %cond.load172
+; RV64-ZVFHMIN-NEXT:    lh a1, 116(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 59
+; RV64-ZVFHMIN-NEXT:    li a3, 58
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 4
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_62
+; RV64-ZVFHMIN-NEXT:  .LBB28_128: # %cond.load175
+; RV64-ZVFHMIN-NEXT:    lh a1, 118(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 60
+; RV64-ZVFHMIN-NEXT:    li a3, 59
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 3
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_63
+; RV64-ZVFHMIN-NEXT:  .LBB28_129: # %cond.load178
+; RV64-ZVFHMIN-NEXT:    lh a1, 120(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v16, a1
+; RV64-ZVFHMIN-NEXT:    li a1, 61
+; RV64-ZVFHMIN-NEXT:    li a3, 60
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v8, v16, a3
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 2
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_64
+; RV64-ZVFHMIN-NEXT:    j .LBB28_65
+; RV64-ZVFHMIN-NEXT:  .LBB28_130:
+; RV64-ZVFHMIN-NEXT:    # implicit-def: $v16m8
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 2
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB28_132
+; RV64-ZVFHMIN-NEXT:  .LBB28_131: # %cond.load193
+; RV64-ZVFHMIN-NEXT:    lh a2, 130(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 2, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 1
+; RV64-ZVFHMIN-NEXT:  .LBB28_132: # %else194
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 4
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB28_195
+; RV64-ZVFHMIN-NEXT:  # %bb.133: # %else197
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 8
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB28_196
+; RV64-ZVFHMIN-NEXT:  .LBB28_134: # %else200
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 16
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB28_197
+; RV64-ZVFHMIN-NEXT:  .LBB28_135: # %else203
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 32
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB28_198
+; RV64-ZVFHMIN-NEXT:  .LBB28_136: # %else206
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 64
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB28_199
+; RV64-ZVFHMIN-NEXT:  .LBB28_137: # %else209
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 128
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB28_200
+; RV64-ZVFHMIN-NEXT:  .LBB28_138: # %else212
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 256
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB28_201
+; RV64-ZVFHMIN-NEXT:  .LBB28_139: # %else215
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 512
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB28_202
+; RV64-ZVFHMIN-NEXT:  .LBB28_140: # %else218
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 1024
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB28_203
+; RV64-ZVFHMIN-NEXT:  .LBB28_141: # %else221
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 52
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_204
+; RV64-ZVFHMIN-NEXT:  .LBB28_142: # %else224
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 51
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_205
+; RV64-ZVFHMIN-NEXT:  .LBB28_143: # %else227
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 50
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_206
+; RV64-ZVFHMIN-NEXT:  .LBB28_144: # %else230
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 49
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_207
+; RV64-ZVFHMIN-NEXT:  .LBB28_145: # %else233
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 48
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_208
+; RV64-ZVFHMIN-NEXT:  .LBB28_146: # %else236
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 47
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_209
+; RV64-ZVFHMIN-NEXT:  .LBB28_147: # %else239
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 46
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_210
+; RV64-ZVFHMIN-NEXT:  .LBB28_148: # %else242
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 45
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_211
+; RV64-ZVFHMIN-NEXT:  .LBB28_149: # %else245
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 44
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_212
+; RV64-ZVFHMIN-NEXT:  .LBB28_150: # %else248
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 43
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_213
+; RV64-ZVFHMIN-NEXT:  .LBB28_151: # %else251
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 42
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_214
+; RV64-ZVFHMIN-NEXT:  .LBB28_152: # %else254
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 41
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_215
+; RV64-ZVFHMIN-NEXT:  .LBB28_153: # %else257
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 40
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_216
+; RV64-ZVFHMIN-NEXT:  .LBB28_154: # %else260
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 39
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_217
+; RV64-ZVFHMIN-NEXT:  .LBB28_155: # %else263
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 38
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_218
+; RV64-ZVFHMIN-NEXT:  .LBB28_156: # %else266
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 37
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_219
+; RV64-ZVFHMIN-NEXT:  .LBB28_157: # %else269
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 36
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_220
+; RV64-ZVFHMIN-NEXT:  .LBB28_158: # %else272
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 35
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_221
+; RV64-ZVFHMIN-NEXT:  .LBB28_159: # %else275
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 34
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_222
+; RV64-ZVFHMIN-NEXT:  .LBB28_160: # %else278
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 33
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_223
+; RV64-ZVFHMIN-NEXT:  .LBB28_161: # %else281
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 32
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_224
+; RV64-ZVFHMIN-NEXT:  .LBB28_162: # %else284
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 31
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_225
+; RV64-ZVFHMIN-NEXT:  .LBB28_163: # %else287
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 30
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_226
+; RV64-ZVFHMIN-NEXT:  .LBB28_164: # %else290
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 29
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_227
+; RV64-ZVFHMIN-NEXT:  .LBB28_165: # %else293
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 28
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_228
+; RV64-ZVFHMIN-NEXT:  .LBB28_166: # %else296
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 27
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_229
+; RV64-ZVFHMIN-NEXT:  .LBB28_167: # %else299
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 26
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_230
+; RV64-ZVFHMIN-NEXT:  .LBB28_168: # %else302
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 25
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_231
+; RV64-ZVFHMIN-NEXT:  .LBB28_169: # %else305
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 24
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_232
+; RV64-ZVFHMIN-NEXT:  .LBB28_170: # %else308
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 23
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_233
+; RV64-ZVFHMIN-NEXT:  .LBB28_171: # %else311
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 22
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_234
+; RV64-ZVFHMIN-NEXT:  .LBB28_172: # %else314
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 21
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_235
+; RV64-ZVFHMIN-NEXT:  .LBB28_173: # %else317
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 20
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_236
+; RV64-ZVFHMIN-NEXT:  .LBB28_174: # %else320
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 19
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_237
+; RV64-ZVFHMIN-NEXT:  .LBB28_175: # %else323
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 18
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_238
+; RV64-ZVFHMIN-NEXT:  .LBB28_176: # %else326
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 17
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_239
+; RV64-ZVFHMIN-NEXT:  .LBB28_177: # %else329
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 16
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_240
+; RV64-ZVFHMIN-NEXT:  .LBB28_178: # %else332
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 15
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_241
+; RV64-ZVFHMIN-NEXT:  .LBB28_179: # %else335
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 14
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_242
+; RV64-ZVFHMIN-NEXT:  .LBB28_180: # %else338
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 13
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_243
+; RV64-ZVFHMIN-NEXT:  .LBB28_181: # %else341
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 12
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_244
+; RV64-ZVFHMIN-NEXT:  .LBB28_182: # %else344
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 11
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_245
+; RV64-ZVFHMIN-NEXT:  .LBB28_183: # %else347
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 10
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_246
+; RV64-ZVFHMIN-NEXT:  .LBB28_184: # %else350
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 9
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_247
+; RV64-ZVFHMIN-NEXT:  .LBB28_185: # %else353
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 8
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_248
+; RV64-ZVFHMIN-NEXT:  .LBB28_186: # %else356
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 7
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_249
+; RV64-ZVFHMIN-NEXT:  .LBB28_187: # %else359
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 6
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_250
+; RV64-ZVFHMIN-NEXT:  .LBB28_188: # %else362
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 5
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_251
+; RV64-ZVFHMIN-NEXT:  .LBB28_189: # %else365
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 4
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_252
+; RV64-ZVFHMIN-NEXT:  .LBB28_190: # %else368
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 3
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_253
+; RV64-ZVFHMIN-NEXT:  .LBB28_191: # %else371
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 2
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_254
+; RV64-ZVFHMIN-NEXT:  .LBB28_192: # %else374
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 1
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_255
+; RV64-ZVFHMIN-NEXT:  .LBB28_193: # %else377
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_256
+; RV64-ZVFHMIN-NEXT:  .LBB28_194: # %else380
+; RV64-ZVFHMIN-NEXT:    ret
+; RV64-ZVFHMIN-NEXT:  .LBB28_195: # %cond.load196
+; RV64-ZVFHMIN-NEXT:    lh a2, 132(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 3, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 2
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 8
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB28_134
+; RV64-ZVFHMIN-NEXT:  .LBB28_196: # %cond.load199
+; RV64-ZVFHMIN-NEXT:    lh a2, 134(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 4, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 3
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 16
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB28_135
+; RV64-ZVFHMIN-NEXT:  .LBB28_197: # %cond.load202
+; RV64-ZVFHMIN-NEXT:    lh a2, 136(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 5, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 4
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 32
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB28_136
+; RV64-ZVFHMIN-NEXT:  .LBB28_198: # %cond.load205
+; RV64-ZVFHMIN-NEXT:    lh a2, 138(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 6, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 5
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 64
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB28_137
+; RV64-ZVFHMIN-NEXT:  .LBB28_199: # %cond.load208
+; RV64-ZVFHMIN-NEXT:    lh a2, 140(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 6
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 128
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB28_138
+; RV64-ZVFHMIN-NEXT:  .LBB28_200: # %cond.load211
+; RV64-ZVFHMIN-NEXT:    lh a2, 142(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 7
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 256
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB28_139
+; RV64-ZVFHMIN-NEXT:  .LBB28_201: # %cond.load214
+; RV64-ZVFHMIN-NEXT:    lh a2, 144(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 9, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 8
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 512
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB28_140
+; RV64-ZVFHMIN-NEXT:  .LBB28_202: # %cond.load217
+; RV64-ZVFHMIN-NEXT:    lh a2, 146(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 10, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 9
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 1024
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB28_141
+; RV64-ZVFHMIN-NEXT:  .LBB28_203: # %cond.load220
+; RV64-ZVFHMIN-NEXT:    lh a2, 148(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 11, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 10
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 52
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_142
+; RV64-ZVFHMIN-NEXT:  .LBB28_204: # %cond.load223
+; RV64-ZVFHMIN-NEXT:    lh a2, 150(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 12, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 11
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 51
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_143
+; RV64-ZVFHMIN-NEXT:  .LBB28_205: # %cond.load226
+; RV64-ZVFHMIN-NEXT:    lh a2, 152(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 13, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 12
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 50
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_144
+; RV64-ZVFHMIN-NEXT:  .LBB28_206: # %cond.load229
+; RV64-ZVFHMIN-NEXT:    lh a2, 154(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 14, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 13
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 49
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_145
+; RV64-ZVFHMIN-NEXT:  .LBB28_207: # %cond.load232
+; RV64-ZVFHMIN-NEXT:    lh a2, 156(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 15, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 14
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 48
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_146
+; RV64-ZVFHMIN-NEXT:  .LBB28_208: # %cond.load235
+; RV64-ZVFHMIN-NEXT:    lh a2, 158(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 16, e16, m2, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 15
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 47
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_147
+; RV64-ZVFHMIN-NEXT:  .LBB28_209: # %cond.load238
+; RV64-ZVFHMIN-NEXT:    lh a2, 160(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 17, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 16
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 46
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_148
+; RV64-ZVFHMIN-NEXT:  .LBB28_210: # %cond.load241
+; RV64-ZVFHMIN-NEXT:    lh a2, 162(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 18, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 17
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 45
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_149
+; RV64-ZVFHMIN-NEXT:  .LBB28_211: # %cond.load244
+; RV64-ZVFHMIN-NEXT:    lh a2, 164(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 19, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 18
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 44
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_150
+; RV64-ZVFHMIN-NEXT:  .LBB28_212: # %cond.load247
+; RV64-ZVFHMIN-NEXT:    lh a2, 166(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 20, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 19
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 43
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_151
+; RV64-ZVFHMIN-NEXT:  .LBB28_213: # %cond.load250
+; RV64-ZVFHMIN-NEXT:    lh a2, 168(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 21, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 20
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 42
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_152
+; RV64-ZVFHMIN-NEXT:  .LBB28_214: # %cond.load253
+; RV64-ZVFHMIN-NEXT:    lh a2, 170(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 22, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 21
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 41
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_153
+; RV64-ZVFHMIN-NEXT:  .LBB28_215: # %cond.load256
+; RV64-ZVFHMIN-NEXT:    lh a2, 172(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 23, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 22
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 40
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_154
+; RV64-ZVFHMIN-NEXT:  .LBB28_216: # %cond.load259
+; RV64-ZVFHMIN-NEXT:    lh a2, 174(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 24, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 23
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 39
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_155
+; RV64-ZVFHMIN-NEXT:  .LBB28_217: # %cond.load262
+; RV64-ZVFHMIN-NEXT:    lh a2, 176(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 25, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 24
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 38
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_156
+; RV64-ZVFHMIN-NEXT:  .LBB28_218: # %cond.load265
+; RV64-ZVFHMIN-NEXT:    lh a2, 178(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 26, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 25
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 37
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_157
+; RV64-ZVFHMIN-NEXT:  .LBB28_219: # %cond.load268
+; RV64-ZVFHMIN-NEXT:    lh a2, 180(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 27, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 26
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 36
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_158
+; RV64-ZVFHMIN-NEXT:  .LBB28_220: # %cond.load271
+; RV64-ZVFHMIN-NEXT:    lh a2, 182(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 28, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 27
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 35
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_159
+; RV64-ZVFHMIN-NEXT:  .LBB28_221: # %cond.load274
+; RV64-ZVFHMIN-NEXT:    lh a2, 184(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 29, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 28
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 34
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_160
+; RV64-ZVFHMIN-NEXT:  .LBB28_222: # %cond.load277
+; RV64-ZVFHMIN-NEXT:    lh a2, 186(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 30, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 29
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 33
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_161
+; RV64-ZVFHMIN-NEXT:  .LBB28_223: # %cond.load280
+; RV64-ZVFHMIN-NEXT:    lh a2, 188(a0)
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 31, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 30
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 32
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_162
+; RV64-ZVFHMIN-NEXT:  .LBB28_224: # %cond.load283
+; RV64-ZVFHMIN-NEXT:    lh a2, 190(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 32
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vi v16, v24, 31
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 31
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_163
+; RV64-ZVFHMIN-NEXT:  .LBB28_225: # %cond.load286
+; RV64-ZVFHMIN-NEXT:    lh a2, 192(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 33
+; RV64-ZVFHMIN-NEXT:    li a3, 32
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 30
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_164
+; RV64-ZVFHMIN-NEXT:  .LBB28_226: # %cond.load289
+; RV64-ZVFHMIN-NEXT:    lh a2, 194(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 34
+; RV64-ZVFHMIN-NEXT:    li a3, 33
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 29
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_165
+; RV64-ZVFHMIN-NEXT:  .LBB28_227: # %cond.load292
+; RV64-ZVFHMIN-NEXT:    lh a2, 196(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 35
+; RV64-ZVFHMIN-NEXT:    li a3, 34
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 28
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_166
+; RV64-ZVFHMIN-NEXT:  .LBB28_228: # %cond.load295
+; RV64-ZVFHMIN-NEXT:    lh a2, 198(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 36
+; RV64-ZVFHMIN-NEXT:    li a3, 35
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 27
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_167
+; RV64-ZVFHMIN-NEXT:  .LBB28_229: # %cond.load298
+; RV64-ZVFHMIN-NEXT:    lh a2, 200(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 37
+; RV64-ZVFHMIN-NEXT:    li a3, 36
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 26
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_168
+; RV64-ZVFHMIN-NEXT:  .LBB28_230: # %cond.load301
+; RV64-ZVFHMIN-NEXT:    lh a2, 202(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 38
+; RV64-ZVFHMIN-NEXT:    li a3, 37
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 25
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_169
+; RV64-ZVFHMIN-NEXT:  .LBB28_231: # %cond.load304
+; RV64-ZVFHMIN-NEXT:    lh a2, 204(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 39
+; RV64-ZVFHMIN-NEXT:    li a3, 38
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 24
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_170
+; RV64-ZVFHMIN-NEXT:  .LBB28_232: # %cond.load307
+; RV64-ZVFHMIN-NEXT:    lh a2, 206(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 40
+; RV64-ZVFHMIN-NEXT:    li a3, 39
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 23
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_171
+; RV64-ZVFHMIN-NEXT:  .LBB28_233: # %cond.load310
+; RV64-ZVFHMIN-NEXT:    lh a2, 208(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 41
+; RV64-ZVFHMIN-NEXT:    li a3, 40
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 22
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_172
+; RV64-ZVFHMIN-NEXT:  .LBB28_234: # %cond.load313
+; RV64-ZVFHMIN-NEXT:    lh a2, 210(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 42
+; RV64-ZVFHMIN-NEXT:    li a3, 41
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 21
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_173
+; RV64-ZVFHMIN-NEXT:  .LBB28_235: # %cond.load316
+; RV64-ZVFHMIN-NEXT:    lh a2, 212(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 43
+; RV64-ZVFHMIN-NEXT:    li a3, 42
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 20
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_174
+; RV64-ZVFHMIN-NEXT:  .LBB28_236: # %cond.load319
+; RV64-ZVFHMIN-NEXT:    lh a2, 214(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 44
+; RV64-ZVFHMIN-NEXT:    li a3, 43
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 19
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_175
+; RV64-ZVFHMIN-NEXT:  .LBB28_237: # %cond.load322
+; RV64-ZVFHMIN-NEXT:    lh a2, 216(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 45
+; RV64-ZVFHMIN-NEXT:    li a3, 44
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 18
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_176
+; RV64-ZVFHMIN-NEXT:  .LBB28_238: # %cond.load325
+; RV64-ZVFHMIN-NEXT:    lh a2, 218(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 46
+; RV64-ZVFHMIN-NEXT:    li a3, 45
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 17
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_177
+; RV64-ZVFHMIN-NEXT:  .LBB28_239: # %cond.load328
+; RV64-ZVFHMIN-NEXT:    lh a2, 220(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 47
+; RV64-ZVFHMIN-NEXT:    li a3, 46
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 16
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_178
+; RV64-ZVFHMIN-NEXT:  .LBB28_240: # %cond.load331
+; RV64-ZVFHMIN-NEXT:    lh a2, 222(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 48
+; RV64-ZVFHMIN-NEXT:    li a3, 47
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 15
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_179
+; RV64-ZVFHMIN-NEXT:  .LBB28_241: # %cond.load334
+; RV64-ZVFHMIN-NEXT:    lh a2, 224(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 49
+; RV64-ZVFHMIN-NEXT:    li a3, 48
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 14
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_180
+; RV64-ZVFHMIN-NEXT:  .LBB28_242: # %cond.load337
+; RV64-ZVFHMIN-NEXT:    lh a2, 226(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 50
+; RV64-ZVFHMIN-NEXT:    li a3, 49
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 13
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_181
+; RV64-ZVFHMIN-NEXT:  .LBB28_243: # %cond.load340
+; RV64-ZVFHMIN-NEXT:    lh a2, 228(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 51
+; RV64-ZVFHMIN-NEXT:    li a3, 50
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 12
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_182
+; RV64-ZVFHMIN-NEXT:  .LBB28_244: # %cond.load343
+; RV64-ZVFHMIN-NEXT:    lh a2, 230(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 52
+; RV64-ZVFHMIN-NEXT:    li a3, 51
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 11
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_183
+; RV64-ZVFHMIN-NEXT:  .LBB28_245: # %cond.load346
+; RV64-ZVFHMIN-NEXT:    lh a2, 232(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 53
+; RV64-ZVFHMIN-NEXT:    li a3, 52
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 10
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_184
+; RV64-ZVFHMIN-NEXT:  .LBB28_246: # %cond.load349
+; RV64-ZVFHMIN-NEXT:    lh a2, 234(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 54
+; RV64-ZVFHMIN-NEXT:    li a3, 53
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 9
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_185
+; RV64-ZVFHMIN-NEXT:  .LBB28_247: # %cond.load352
+; RV64-ZVFHMIN-NEXT:    lh a2, 236(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 55
+; RV64-ZVFHMIN-NEXT:    li a3, 54
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 8
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_186
+; RV64-ZVFHMIN-NEXT:  .LBB28_248: # %cond.load355
+; RV64-ZVFHMIN-NEXT:    lh a2, 238(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 56
+; RV64-ZVFHMIN-NEXT:    li a3, 55
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 7
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_187
+; RV64-ZVFHMIN-NEXT:  .LBB28_249: # %cond.load358
+; RV64-ZVFHMIN-NEXT:    lh a2, 240(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 57
+; RV64-ZVFHMIN-NEXT:    li a3, 56
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 6
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_188
+; RV64-ZVFHMIN-NEXT:  .LBB28_250: # %cond.load361
+; RV64-ZVFHMIN-NEXT:    lh a2, 242(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 58
+; RV64-ZVFHMIN-NEXT:    li a3, 57
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 5
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_189
+; RV64-ZVFHMIN-NEXT:  .LBB28_251: # %cond.load364
+; RV64-ZVFHMIN-NEXT:    lh a2, 244(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 59
+; RV64-ZVFHMIN-NEXT:    li a3, 58
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 4
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_190
+; RV64-ZVFHMIN-NEXT:  .LBB28_252: # %cond.load367
+; RV64-ZVFHMIN-NEXT:    lh a2, 246(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 60
+; RV64-ZVFHMIN-NEXT:    li a3, 59
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 3
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_191
+; RV64-ZVFHMIN-NEXT:  .LBB28_253: # %cond.load370
+; RV64-ZVFHMIN-NEXT:    lh a2, 248(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 61
+; RV64-ZVFHMIN-NEXT:    li a3, 60
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 2
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_192
+; RV64-ZVFHMIN-NEXT:  .LBB28_254: # %cond.load373
+; RV64-ZVFHMIN-NEXT:    lh a2, 250(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 62
+; RV64-ZVFHMIN-NEXT:    li a3, 61
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 1
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_193
+; RV64-ZVFHMIN-NEXT:  .LBB28_255: # %cond.load376
+; RV64-ZVFHMIN-NEXT:    lh a2, 252(a0)
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a2
+; RV64-ZVFHMIN-NEXT:    li a2, 63
+; RV64-ZVFHMIN-NEXT:    li a3, 62
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, tu, ma
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a3
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_194
+; RV64-ZVFHMIN-NEXT:  .LBB28_256: # %cond.load379
+; RV64-ZVFHMIN-NEXT:    lh a0, 254(a0)
+; RV64-ZVFHMIN-NEXT:    li a1, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.s.x v24, a0
+; RV64-ZVFHMIN-NEXT:    li a0, 63
+; RV64-ZVFHMIN-NEXT:    vslideup.vx v16, v24, a0
+; RV64-ZVFHMIN-NEXT:    ret
   %load = call <128 x half> @llvm.masked.load.v128f16(ptr %a, i32 8, <128 x i1> %mask, <128 x half> undef)
-  store <128 x half> %load, ptr %res_ptr
-  ret void
+  ret <128 x half> %load
 }
-declare <128 x half> @llvm.masked.load.v128f16(ptr, i32, <128 x i1>, <128 x half>)
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32-ZVFH: {{.*}}
+; RV64-ZVFH: {{.*}}

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll
index 2f20caa6eb1894..4f3313f3760bee 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll
@@ -1,531 +1,332 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
-define void @masked_load_v1i8(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <1 x i8> @masked_load_v1i8(ptr %a, <1 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a1)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vle8.v v8, (a0), v0.t
-; CHECK-NEXT:    vse8.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <1 x i8>, ptr %m_ptr
-  %mask = icmp eq <1 x i8> %m, zeroinitializer
   %load = call <1 x i8> @llvm.masked.load.v1i8(ptr %a, i32 8, <1 x i1> %mask, <1 x i8> undef)
-  store <1 x i8> %load, ptr %res_ptr
-  ret void
+  ret <1 x i8> %load
 }
-declare <1 x i8> @llvm.masked.load.v1i8(ptr, i32, <1 x i1>, <1 x i8>)
 
-define void @masked_load_v1i16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <1 x i16> @masked_load_v1i16(ptr %a, <1 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v1i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a1)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vle16.v v8, (a0), v0.t
-; CHECK-NEXT:    vse16.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <1 x i16>, ptr %m_ptr
-  %mask = icmp eq <1 x i16> %m, zeroinitializer
   %load = call <1 x i16> @llvm.masked.load.v1i16(ptr %a, i32 8, <1 x i1> %mask, <1 x i16> undef)
-  store <1 x i16> %load, ptr %res_ptr
-  ret void
+  ret <1 x i16> %load
 }
-declare <1 x i16> @llvm.masked.load.v1i16(ptr, i32, <1 x i1>, <1 x i16>)
 
-define void @masked_load_v1i32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <1 x i32> @masked_load_v1i32(ptr %a, <1 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a1)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vle32.v v8, (a0), v0.t
-; CHECK-NEXT:    vse32.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <1 x i32>, ptr %m_ptr
-  %mask = icmp eq <1 x i32> %m, zeroinitializer
   %load = call <1 x i32> @llvm.masked.load.v1i32(ptr %a, i32 8, <1 x i1> %mask, <1 x i32> undef)
-  store <1 x i32> %load, ptr %res_ptr
-  ret void
+  ret <1 x i32> %load
 }
-declare <1 x i32> @llvm.masked.load.v1i32(ptr, i32, <1 x i1>, <1 x i32>)
 
-define void @masked_load_v1i64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <1 x i64> @masked_load_v1i64(ptr %a, <1 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v1i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a1)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vle64.v v8, (a0), v0.t
-; CHECK-NEXT:    vse64.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <1 x i64>, ptr %m_ptr
-  %mask = icmp eq <1 x i64> %m, zeroinitializer
   %load = call <1 x i64> @llvm.masked.load.v1i64(ptr %a, i32 8, <1 x i1> %mask, <1 x i64> undef)
-  store <1 x i64> %load, ptr %res_ptr
-  ret void
+  ret <1 x i64> %load
 }
-declare <1 x i64> @llvm.masked.load.v1i64(ptr, i32, <1 x i1>, <1 x i64>)
 
-define void @masked_load_v2i8(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <2 x i8> @masked_load_v2i8(ptr %a, <2 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a1)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vle8.v v8, (a0), v0.t
-; CHECK-NEXT:    vse8.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <2 x i8>, ptr %m_ptr
-  %mask = icmp eq <2 x i8> %m, zeroinitializer
   %load = call <2 x i8> @llvm.masked.load.v2i8(ptr %a, i32 8, <2 x i1> %mask, <2 x i8> undef)
-  store <2 x i8> %load, ptr %res_ptr
-  ret void
+  ret <2 x i8> %load
 }
-declare <2 x i8> @llvm.masked.load.v2i8(ptr, i32, <2 x i1>, <2 x i8>)
 
-define void @masked_load_v2i16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <2 x i16> @masked_load_v2i16(ptr %a, <2 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a1)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vle16.v v8, (a0), v0.t
-; CHECK-NEXT:    vse16.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <2 x i16>, ptr %m_ptr
-  %mask = icmp eq <2 x i16> %m, zeroinitializer
   %load = call <2 x i16> @llvm.masked.load.v2i16(ptr %a, i32 8, <2 x i1> %mask, <2 x i16> undef)
-  store <2 x i16> %load, ptr %res_ptr
-  ret void
+  ret <2 x i16> %load
 }
-declare <2 x i16> @llvm.masked.load.v2i16(ptr, i32, <2 x i1>, <2 x i16>)
 
-define void @masked_load_v2i32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <2 x i32> @masked_load_v2i32(ptr %a, <2 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a1)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vle32.v v8, (a0), v0.t
-; CHECK-NEXT:    vse32.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <2 x i32>, ptr %m_ptr
-  %mask = icmp eq <2 x i32> %m, zeroinitializer
   %load = call <2 x i32> @llvm.masked.load.v2i32(ptr %a, i32 8, <2 x i1> %mask, <2 x i32> undef)
-  store <2 x i32> %load, ptr %res_ptr
-  ret void
+  ret <2 x i32> %load
 }
-declare <2 x i32> @llvm.masked.load.v2i32(ptr, i32, <2 x i1>, <2 x i32>)
 
-define void @masked_load_v2i64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <2 x i64> @masked_load_v2i64(ptr %a, <2 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a1)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vle64.v v8, (a0), v0.t
-; CHECK-NEXT:    vse64.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <2 x i64>, ptr %m_ptr
-  %mask = icmp eq <2 x i64> %m, zeroinitializer
   %load = call <2 x i64> @llvm.masked.load.v2i64(ptr %a, i32 8, <2 x i1> %mask, <2 x i64> undef)
-  store <2 x i64> %load, ptr %res_ptr
-  ret void
+  ret <2 x i64> %load
 }
-declare <2 x i64> @llvm.masked.load.v2i64(ptr, i32, <2 x i1>, <2 x i64>)
 
-define void @masked_load_v4i8(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <4 x i8> @masked_load_v4i8(ptr %a, <4 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a1)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vle8.v v8, (a0), v0.t
-; CHECK-NEXT:    vse8.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <4 x i8>, ptr %m_ptr
-  %mask = icmp eq <4 x i8> %m, zeroinitializer
   %load = call <4 x i8> @llvm.masked.load.v4i8(ptr %a, i32 8, <4 x i1> %mask, <4 x i8> undef)
-  store <4 x i8> %load, ptr %res_ptr
-  ret void
+  ret <4 x i8> %load
 }
-declare <4 x i8> @llvm.masked.load.v4i8(ptr, i32, <4 x i1>, <4 x i8>)
 
-define void @masked_load_v4i16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <4 x i16> @masked_load_v4i16(ptr %a, <4 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a1)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vle16.v v8, (a0), v0.t
-; CHECK-NEXT:    vse16.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <4 x i16>, ptr %m_ptr
-  %mask = icmp eq <4 x i16> %m, zeroinitializer
   %load = call <4 x i16> @llvm.masked.load.v4i16(ptr %a, i32 8, <4 x i1> %mask, <4 x i16> undef)
-  store <4 x i16> %load, ptr %res_ptr
-  ret void
+  ret <4 x i16> %load
 }
-declare <4 x i16> @llvm.masked.load.v4i16(ptr, i32, <4 x i1>, <4 x i16>)
 
-define void @masked_load_v4i32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <4 x i32> @masked_load_v4i32(ptr %a, <4 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a1)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vle32.v v8, (a0), v0.t
-; CHECK-NEXT:    vse32.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <4 x i32>, ptr %m_ptr
-  %mask = icmp eq <4 x i32> %m, zeroinitializer
   %load = call <4 x i32> @llvm.masked.load.v4i32(ptr %a, i32 8, <4 x i1> %mask, <4 x i32> undef)
-  store <4 x i32> %load, ptr %res_ptr
-  ret void
+  ret <4 x i32> %load
 }
-declare <4 x i32> @llvm.masked.load.v4i32(ptr, i32, <4 x i1>, <4 x i32>)
 
-define void @masked_load_v4i64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <4 x i64> @masked_load_v4i64(ptr %a, <4 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a1)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vle64.v v8, (a0), v0.t
-; CHECK-NEXT:    vse64.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <4 x i64>, ptr %m_ptr
-  %mask = icmp eq <4 x i64> %m, zeroinitializer
   %load = call <4 x i64> @llvm.masked.load.v4i64(ptr %a, i32 8, <4 x i1> %mask, <4 x i64> undef)
-  store <4 x i64> %load, ptr %res_ptr
-  ret void
+  ret <4 x i64> %load
 }
-declare <4 x i64> @llvm.masked.load.v4i64(ptr, i32, <4 x i1>, <4 x i64>)
 
-define void @masked_load_v8i8(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <8 x i8> @masked_load_v8i8(ptr %a, <8 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a1)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vle8.v v8, (a0), v0.t
-; CHECK-NEXT:    vse8.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <8 x i8>, ptr %m_ptr
-  %mask = icmp eq <8 x i8> %m, zeroinitializer
   %load = call <8 x i8> @llvm.masked.load.v8i8(ptr %a, i32 8, <8 x i1> %mask, <8 x i8> undef)
-  store <8 x i8> %load, ptr %res_ptr
-  ret void
+  ret <8 x i8> %load
 }
-declare <8 x i8> @llvm.masked.load.v8i8(ptr, i32, <8 x i1>, <8 x i8>)
 
-define void @masked_load_v8i16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <8 x i16> @masked_load_v8i16(ptr %a, <8 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a1)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vle16.v v8, (a0), v0.t
-; CHECK-NEXT:    vse16.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <8 x i16>, ptr %m_ptr
-  %mask = icmp eq <8 x i16> %m, zeroinitializer
   %load = call <8 x i16> @llvm.masked.load.v8i16(ptr %a, i32 8, <8 x i1> %mask, <8 x i16> undef)
-  store <8 x i16> %load, ptr %res_ptr
-  ret void
+  ret <8 x i16> %load
 }
-declare <8 x i16> @llvm.masked.load.v8i16(ptr, i32, <8 x i1>, <8 x i16>)
 
-define void @masked_load_v8i32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <8 x i32> @masked_load_v8i32(ptr %a, <8 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a1)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vle32.v v8, (a0), v0.t
-; CHECK-NEXT:    vse32.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <8 x i32>, ptr %m_ptr
-  %mask = icmp eq <8 x i32> %m, zeroinitializer
   %load = call <8 x i32> @llvm.masked.load.v8i32(ptr %a, i32 8, <8 x i1> %mask, <8 x i32> undef)
-  store <8 x i32> %load, ptr %res_ptr
-  ret void
+  ret <8 x i32> %load
 }
-declare <8 x i32> @llvm.masked.load.v8i32(ptr, i32, <8 x i1>, <8 x i32>)
 
-define void @masked_load_v8i64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <8 x i64> @masked_load_v8i64(ptr %a, <8 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a1)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vle64.v v8, (a0), v0.t
-; CHECK-NEXT:    vse64.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <8 x i64>, ptr %m_ptr
-  %mask = icmp eq <8 x i64> %m, zeroinitializer
   %load = call <8 x i64> @llvm.masked.load.v8i64(ptr %a, i32 8, <8 x i1> %mask, <8 x i64> undef)
-  store <8 x i64> %load, ptr %res_ptr
-  ret void
+  ret <8 x i64> %load
 }
-declare <8 x i64> @llvm.masked.load.v8i64(ptr, i32, <8 x i1>, <8 x i64>)
 
-define void @masked_load_v16i8(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <16 x i8> @masked_load_v16i8(ptr %a, <16 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a1)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vle8.v v8, (a0), v0.t
-; CHECK-NEXT:    vse8.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <16 x i8>, ptr %m_ptr
-  %mask = icmp eq <16 x i8> %m, zeroinitializer
   %load = call <16 x i8> @llvm.masked.load.v16i8(ptr %a, i32 8, <16 x i1> %mask, <16 x i8> undef)
-  store <16 x i8> %load, ptr %res_ptr
-  ret void
+  ret <16 x i8> %load
 }
-declare <16 x i8> @llvm.masked.load.v16i8(ptr, i32, <16 x i1>, <16 x i8>)
 
-define void @masked_load_v16i16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <16 x i16> @masked_load_v16i16(ptr %a, <16 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a1)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vle16.v v8, (a0), v0.t
-; CHECK-NEXT:    vse16.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <16 x i16>, ptr %m_ptr
-  %mask = icmp eq <16 x i16> %m, zeroinitializer
   %load = call <16 x i16> @llvm.masked.load.v16i16(ptr %a, i32 8, <16 x i1> %mask, <16 x i16> undef)
-  store <16 x i16> %load, ptr %res_ptr
-  ret void
+  ret <16 x i16> %load
 }
-declare <16 x i16> @llvm.masked.load.v16i16(ptr, i32, <16 x i1>, <16 x i16>)
 
-define void @masked_load_v16i32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <16 x i32> @masked_load_v16i32(ptr %a, <16 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a1)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vle32.v v8, (a0), v0.t
-; CHECK-NEXT:    vse32.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <16 x i32>, ptr %m_ptr
-  %mask = icmp eq <16 x i32> %m, zeroinitializer
   %load = call <16 x i32> @llvm.masked.load.v16i32(ptr %a, i32 8, <16 x i1> %mask, <16 x i32> undef)
-  store <16 x i32> %load, ptr %res_ptr
-  ret void
+  ret <16 x i32> %load
 }
-declare <16 x i32> @llvm.masked.load.v16i32(ptr, i32, <16 x i1>, <16 x i32>)
 
-define void @masked_load_v16i64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <16 x i64> @masked_load_v16i64(ptr %a, <16 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v16i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a1)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
 ; CHECK-NEXT:    vle64.v v8, (a0), v0.t
-; CHECK-NEXT:    vse64.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <16 x i64>, ptr %m_ptr
-  %mask = icmp eq <16 x i64> %m, zeroinitializer
   %load = call <16 x i64> @llvm.masked.load.v16i64(ptr %a, i32 8, <16 x i1> %mask, <16 x i64> undef)
-  store <16 x i64> %load, ptr %res_ptr
-  ret void
+  ret <16 x i64> %load
 }
-declare <16 x i64> @llvm.masked.load.v16i64(ptr, i32, <16 x i1>, <16 x i64>)
 
-define void @masked_load_v32i8(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <32 x i8> @masked_load_v32i8(ptr %a, <32 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v32i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a3, 32
-; CHECK-NEXT:    vsetvli zero, a3, e8, m2, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a1)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0), v0.t
-; CHECK-NEXT:    vse8.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <32 x i8>, ptr %m_ptr
-  %mask = icmp eq <32 x i8> %m, zeroinitializer
   %load = call <32 x i8> @llvm.masked.load.v32i8(ptr %a, i32 8, <32 x i1> %mask, <32 x i8> undef)
-  store <32 x i8> %load, ptr %res_ptr
-  ret void
+  ret <32 x i8> %load
 }
-declare <32 x i8> @llvm.masked.load.v32i8(ptr, i32, <32 x i1>, <32 x i8>)
 
-define void @masked_load_v32i16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <32 x i16> @masked_load_v32i16(ptr %a, <32 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v32i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a3, 32
-; CHECK-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a1)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
 ; CHECK-NEXT:    vle16.v v8, (a0), v0.t
-; CHECK-NEXT:    vse16.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <32 x i16>, ptr %m_ptr
-  %mask = icmp eq <32 x i16> %m, zeroinitializer
   %load = call <32 x i16> @llvm.masked.load.v32i16(ptr %a, i32 8, <32 x i1> %mask, <32 x i16> undef)
-  store <32 x i16> %load, ptr %res_ptr
-  ret void
+  ret <32 x i16> %load
 }
-declare <32 x i16> @llvm.masked.load.v32i16(ptr, i32, <32 x i1>, <32 x i16>)
 
-define void @masked_load_v32i32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <32 x i32> @masked_load_v32i32(ptr %a, <32 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v32i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a3, 32
-; CHECK-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a1)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0), v0.t
-; CHECK-NEXT:    vse32.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <32 x i32>, ptr %m_ptr
-  %mask = icmp eq <32 x i32> %m, zeroinitializer
   %load = call <32 x i32> @llvm.masked.load.v32i32(ptr %a, i32 8, <32 x i1> %mask, <32 x i32> undef)
-  store <32 x i32> %load, ptr %res_ptr
-  ret void
+  ret <32 x i32> %load
 }
-declare <32 x i32> @llvm.masked.load.v32i32(ptr, i32, <32 x i1>, <32 x i32>)
 
-define void @masked_load_v32i64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <32 x i64> @masked_load_v32i64(ptr %a, <32 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v32i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a3, a1, 128
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT:    vle64.v v16, (a1)
-; CHECK-NEXT:    vle64.v v24, (a3)
-; CHECK-NEXT:    vmseq.vi v8, v16, 0
-; CHECK-NEXT:    vmseq.vi v0, v24, 0
-; CHECK-NEXT:    addi a1, a0, 128
-; CHECK-NEXT:    vle64.v v16, (a1), v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    vle64.v v8, (a0), v0.t
-; CHECK-NEXT:    vse64.v v8, (a2)
-; CHECK-NEXT:    addi a0, a2, 128
-; CHECK-NEXT:    vse64.v v16, (a0)
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; CHECK-NEXT:    vslidedown.vi v0, v0, 2
+; CHECK-NEXT:    addi a0, a0, 128
+; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT:    vle64.v v16, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <32 x i64>, ptr %m_ptr
-  %mask = icmp eq <32 x i64> %m, zeroinitializer
   %load = call <32 x i64> @llvm.masked.load.v32i64(ptr %a, i32 8, <32 x i1> %mask, <32 x i64> undef)
-  store <32 x i64> %load, ptr %res_ptr
-  ret void
+  ret <32 x i64> %load
 }
-declare <32 x i64> @llvm.masked.load.v32i64(ptr, i32, <32 x i1>, <32 x i64>)
 
-define void @masked_load_v64i8(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <64 x i8> @masked_load_v64i8(ptr %a, <64 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v64i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a3, 64
-; CHECK-NEXT:    vsetvli zero, a3, e8, m4, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a1)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-NEXT:    li a1, 64
+; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0), v0.t
-; CHECK-NEXT:    vse8.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <64 x i8>, ptr %m_ptr
-  %mask = icmp eq <64 x i8> %m, zeroinitializer
   %load = call <64 x i8> @llvm.masked.load.v64i8(ptr %a, i32 8, <64 x i1> %mask, <64 x i8> undef)
-  store <64 x i8> %load, ptr %res_ptr
-  ret void
+  ret <64 x i8> %load
 }
-declare <64 x i8> @llvm.masked.load.v64i8(ptr, i32, <64 x i1>, <64 x i8>)
 
-define void @masked_load_v64i16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <64 x i16> @masked_load_v64i16(ptr %a, <64 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v64i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a3, 64
-; CHECK-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a1)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-NEXT:    li a1, 64
+; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
 ; CHECK-NEXT:    vle16.v v8, (a0), v0.t
-; CHECK-NEXT:    vse16.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <64 x i16>, ptr %m_ptr
-  %mask = icmp eq <64 x i16> %m, zeroinitializer
   %load = call <64 x i16> @llvm.masked.load.v64i16(ptr %a, i32 8, <64 x i1> %mask, <64 x i16> undef)
-  store <64 x i16> %load, ptr %res_ptr
-  ret void
+  ret <64 x i16> %load
 }
-declare <64 x i16> @llvm.masked.load.v64i16(ptr, i32, <64 x i1>, <64 x i16>)
 
-define void @masked_load_v64i32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <64 x i32> @masked_load_v64i32(ptr %a, <64 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v64i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a3, a1, 128
-; CHECK-NEXT:    li a4, 32
-; CHECK-NEXT:    vsetvli zero, a4, e32, m8, ta, ma
-; CHECK-NEXT:    vle32.v v16, (a1)
-; CHECK-NEXT:    vle32.v v24, (a3)
-; CHECK-NEXT:    vmseq.vi v8, v16, 0
-; CHECK-NEXT:    vmseq.vi v0, v24, 0
-; CHECK-NEXT:    addi a1, a0, 128
-; CHECK-NEXT:    vle32.v v16, (a1), v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
 ; CHECK-NEXT:    vle32.v v8, (a0), v0.t
-; CHECK-NEXT:    vse32.v v8, (a2)
-; CHECK-NEXT:    addi a0, a2, 128
-; CHECK-NEXT:    vse32.v v16, (a0)
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
+; CHECK-NEXT:    vslidedown.vi v0, v0, 4
+; CHECK-NEXT:    addi a0, a0, 128
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT:    vle32.v v16, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <64 x i32>, ptr %m_ptr
-  %mask = icmp eq <64 x i32> %m, zeroinitializer
   %load = call <64 x i32> @llvm.masked.load.v64i32(ptr %a, i32 8, <64 x i1> %mask, <64 x i32> undef)
-  store <64 x i32> %load, ptr %res_ptr
-  ret void
+  ret <64 x i32> %load
 }
-declare <64 x i32> @llvm.masked.load.v64i32(ptr, i32, <64 x i1>, <64 x i32>)
 
-define void @masked_load_v128i8(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <128 x i8> @masked_load_v128i8(ptr %a, <128 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v128i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a3, 128
-; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a1)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
+; CHECK-NEXT:    li a1, 128
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0), v0.t
-; CHECK-NEXT:    vse8.v v8, (a2)
 ; CHECK-NEXT:    ret
-  %m = load <128 x i8>, ptr %m_ptr
-  %mask = icmp eq <128 x i8> %m, zeroinitializer
   %load = call <128 x i8> @llvm.masked.load.v128i8(ptr %a, i32 8, <128 x i1> %mask, <128 x i8> undef)
-  store <128 x i8> %load, ptr %res_ptr
-  ret void
+  ret <128 x i8> %load
 }
-declare <128 x i8> @llvm.masked.load.v128i8(ptr, i32, <128 x i1>, <128 x i8>)
 
-define void @masked_load_v256i8(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
+define <128 x i16> @masked_load_v128i16(ptr %a, <128 x i1> %mask) {
+; CHECK-LABEL: masked_load_v128i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a1, 64
+; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0), v0.t
+; CHECK-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
+; CHECK-NEXT:    vslidedown.vi v0, v0, 8
+; CHECK-NEXT:    addi a0, a0, 128
+; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT:    vle16.v v16, (a0), v0.t
+; CHECK-NEXT:    ret
+  %load = call <128 x i16> @llvm.masked.load.v128i16(ptr %a, i32 8, <128 x i1> %mask, <128 x i16> undef)
+  ret <128 x i16> %load
+}
+
+define <256 x i8> @masked_load_v256i8(ptr %a, <256 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v256i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a3, a1, 128
-; CHECK-NEXT:    li a4, 128
-; CHECK-NEXT:    vsetvli zero, a4, e8, m8, ta, ma
-; CHECK-NEXT:    vle8.v v16, (a1)
-; CHECK-NEXT:    vle8.v v24, (a3)
-; CHECK-NEXT:    vmseq.vi v8, v16, 0
-; CHECK-NEXT:    vmseq.vi v0, v24, 0
-; CHECK-NEXT:    addi a1, a0, 128
-; CHECK-NEXT:    vle8.v v16, (a1), v0.t
-; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vmv1r.v v16, v8
+; CHECK-NEXT:    li a1, 128
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
 ; CHECK-NEXT:    vle8.v v8, (a0), v0.t
-; CHECK-NEXT:    vse8.v v8, (a2)
-; CHECK-NEXT:    addi a0, a2, 128
-; CHECK-NEXT:    vse8.v v16, (a0)
+; CHECK-NEXT:    addi a0, a0, 128
+; CHECK-NEXT:    vmv1r.v v0, v16
+; CHECK-NEXT:    vle8.v v16, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <256 x i8>, ptr %m_ptr
-  %mask = icmp eq <256 x i8> %m, zeroinitializer
   %load = call <256 x i8> @llvm.masked.load.v256i8(ptr %a, i32 8, <256 x i1> %mask, <256 x i8> undef)
-  store <256 x i8> %load, ptr %res_ptr
-  ret void
+  ret <256 x i8> %load
 }
-declare <256 x i8> @llvm.masked.load.v256i8(ptr, i32, <256 x i1>, <256 x i8>)
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; RV32: {{.*}}
-; RV64: {{.*}}
+

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll
index 683ead4f1c308f..9f3879fd850f21 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll
@@ -1,566 +1,13687 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin,+zfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32,RV32-ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin,+zfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64,RV64-ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin,+zfhmin,+zfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32,RV32-ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin,+zfhmin,+zfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64,RV64-ZVFHMIN
 
-define void @masked_store_v1f16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
-; CHECK-LABEL: masked_store_v1f16:
+define void @masked_store_v1bf16(<1 x bfloat> %val, ptr %a, <1 x i1> %mask) {
+; CHECK-LABEL: masked_store_v1bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a2)
-; CHECK-NEXT:    vle16.v v9, (a0)
-; CHECK-NEXT:    fmv.h.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
-; CHECK-NEXT:    vse16.v v9, (a1), v0.t
+; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
+; CHECK-NEXT:    vfirst.m a1, v0
+; CHECK-NEXT:    bnez a1, .LBB0_2
+; CHECK-NEXT:  # %bb.1: # %cond.store
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vmv.x.s a1, v8
+; CHECK-NEXT:    fmv.h.x fa5, a1
+; CHECK-NEXT:    fsh fa5, 0(a0)
+; CHECK-NEXT:  .LBB0_2: # %else
 ; CHECK-NEXT:    ret
-  %m = load <1 x half>, ptr %m_ptr
-  %mask = fcmp oeq <1 x half> %m, zeroinitializer
-  %val = load <1 x half>, ptr %val_ptr
-  call void @llvm.masked.store.v1f16.p0v1f16(<1 x half> %val, ptr %a, i32 8, <1 x i1> %mask)
+  call void @llvm.masked.store.v1bf16.p0(<1 x bfloat> %val, ptr %a, i32 8, <1 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v1f16.p0v1f16(<1 x half>, ptr, i32, <1 x i1>)
 
-define void @masked_store_v1f32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v1f16(<1 x half> %val, ptr %a, <1 x i1> %mask) {
+; ZVFH-LABEL: masked_store_v1f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; ZVFH-NEXT:    vse16.v v8, (a0), v0.t
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: masked_store_v1f16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
+; ZVFHMIN-NEXT:    vfirst.m a1, v0
+; ZVFHMIN-NEXT:    bnez a1, .LBB1_2
+; ZVFHMIN-NEXT:  # %bb.1: # %cond.store
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vmv.x.s a1, v8
+; ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; ZVFHMIN-NEXT:    fsh fa5, 0(a0)
+; ZVFHMIN-NEXT:  .LBB1_2: # %else
+; ZVFHMIN-NEXT:    ret
+  call void @llvm.masked.store.v1f16.p0(<1 x half> %val, ptr %a, i32 8, <1 x i1> %mask)
+  ret void
+}
+
+define void @masked_store_v1f32(<1 x float> %val, ptr %a, <1 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v1f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a2)
-; CHECK-NEXT:    vle32.v v9, (a0)
-; CHECK-NEXT:    fmv.w.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
-; CHECK-NEXT:    vse32.v v9, (a1), v0.t
+; CHECK-NEXT:    vse32.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <1 x float>, ptr %m_ptr
-  %mask = fcmp oeq <1 x float> %m, zeroinitializer
-  %val = load <1 x float>, ptr %val_ptr
-  call void @llvm.masked.store.v1f32.p0v1f32(<1 x float> %val, ptr %a, i32 8, <1 x i1> %mask)
+  call void @llvm.masked.store.v1f32.p0(<1 x float> %val, ptr %a, i32 8, <1 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v1f32.p0v1f32(<1 x float>, ptr, i32, <1 x i1>)
 
-define void @masked_store_v1f64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
-; RV32-LABEL: masked_store_v1f64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT:    vle64.v v8, (a2)
-; RV32-NEXT:    vle64.v v9, (a0)
-; RV32-NEXT:    fcvt.d.w fa5, zero
-; RV32-NEXT:    vmfeq.vf v0, v8, fa5
-; RV32-NEXT:    vse64.v v9, (a1), v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: masked_store_v1f64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT:    vle64.v v8, (a2)
-; RV64-NEXT:    vle64.v v9, (a0)
-; RV64-NEXT:    fmv.d.x fa5, zero
-; RV64-NEXT:    vmfeq.vf v0, v8, fa5
-; RV64-NEXT:    vse64.v v9, (a1), v0.t
-; RV64-NEXT:    ret
-  %m = load <1 x double>, ptr %m_ptr
-  %mask = fcmp oeq <1 x double> %m, zeroinitializer
-  %val = load <1 x double>, ptr %val_ptr
-  call void @llvm.masked.store.v1f64.p0v1f64(<1 x double> %val, ptr %a, i32 8, <1 x i1> %mask)
+define void @masked_store_v1f64(<1 x double> %val, ptr %a, <1 x i1> %mask) {
+; CHECK-LABEL: masked_store_v1f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT:    vse64.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+  call void @llvm.masked.store.v1f64.p0(<1 x double> %val, ptr %a, i32 8, <1 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v1f64.p0v1f64(<1 x double>, ptr, i32, <1 x i1>)
 
-define void @masked_store_v2f16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
-; CHECK-LABEL: masked_store_v2f16:
+define void @masked_store_v2bf16(<2 x bfloat> %val, ptr %a, <2 x i1> %mask) {
+; CHECK-LABEL: masked_store_v2bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a2)
-; CHECK-NEXT:    vle16.v v9, (a0)
-; CHECK-NEXT:    fmv.h.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
-; CHECK-NEXT:    vse16.v v9, (a1), v0.t
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv.x.s a1, v0
+; CHECK-NEXT:    andi a2, a1, 1
+; CHECK-NEXT:    bnez a2, .LBB4_3
+; CHECK-NEXT:  # %bb.1: # %else
+; CHECK-NEXT:    andi a1, a1, 2
+; CHECK-NEXT:    bnez a1, .LBB4_4
+; CHECK-NEXT:  .LBB4_2: # %else2
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB4_3: # %cond.store
+; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vmv.x.s a2, v8
+; CHECK-NEXT:    fmv.h.x fa5, a2
+; CHECK-NEXT:    fsh fa5, 0(a0)
+; CHECK-NEXT:    andi a1, a1, 2
+; CHECK-NEXT:    beqz a1, .LBB4_2
+; CHECK-NEXT:  .LBB4_4: # %cond.store1
+; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT:    vslidedown.vi v8, v8, 1
+; CHECK-NEXT:    vmv.x.s a1, v8
+; CHECK-NEXT:    fmv.h.x fa5, a1
+; CHECK-NEXT:    fsh fa5, 2(a0)
 ; CHECK-NEXT:    ret
-  %m = load <2 x half>, ptr %m_ptr
-  %mask = fcmp oeq <2 x half> %m, zeroinitializer
-  %val = load <2 x half>, ptr %val_ptr
-  call void @llvm.masked.store.v2f16.p0v2f16(<2 x half> %val, ptr %a, i32 8, <2 x i1> %mask)
+  call void @llvm.masked.store.v2bf16.p0(<2 x bfloat> %val, ptr %a, i32 8, <2 x i1> %mask)
+  ret void
+}
+
+define void @masked_store_v2f16(<2 x half> %val, ptr %a, <2 x i1> %mask) {
+; ZVFH-LABEL: masked_store_v2f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT:    vse16.v v8, (a0), v0.t
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: masked_store_v2f16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.x.s a1, v0
+; ZVFHMIN-NEXT:    andi a2, a1, 1
+; ZVFHMIN-NEXT:    bnez a2, .LBB5_3
+; ZVFHMIN-NEXT:  # %bb.1: # %else
+; ZVFHMIN-NEXT:    andi a1, a1, 2
+; ZVFHMIN-NEXT:    bnez a1, .LBB5_4
+; ZVFHMIN-NEXT:  .LBB5_2: # %else2
+; ZVFHMIN-NEXT:    ret
+; ZVFHMIN-NEXT:  .LBB5_3: # %cond.store
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; ZVFHMIN-NEXT:    fsh fa5, 0(a0)
+; ZVFHMIN-NEXT:    andi a1, a1, 2
+; ZVFHMIN-NEXT:    beqz a1, .LBB5_2
+; ZVFHMIN-NEXT:  .LBB5_4: # %cond.store1
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 1
+; ZVFHMIN-NEXT:    vmv.x.s a1, v8
+; ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; ZVFHMIN-NEXT:    fsh fa5, 2(a0)
+; ZVFHMIN-NEXT:    ret
+  call void @llvm.masked.store.v2f16.p0(<2 x half> %val, ptr %a, i32 8, <2 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v2f16.p0v2f16(<2 x half>, ptr, i32, <2 x i1>)
 
-define void @masked_store_v2f32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v2f32(<2 x float> %val, ptr %a, <2 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v2f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a2)
-; CHECK-NEXT:    vle32.v v9, (a0)
-; CHECK-NEXT:    fmv.w.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
-; CHECK-NEXT:    vse32.v v9, (a1), v0.t
+; CHECK-NEXT:    vse32.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <2 x float>, ptr %m_ptr
-  %mask = fcmp oeq <2 x float> %m, zeroinitializer
-  %val = load <2 x float>, ptr %val_ptr
-  call void @llvm.masked.store.v2f32.p0v2f32(<2 x float> %val, ptr %a, i32 8, <2 x i1> %mask)
+  call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %a, i32 8, <2 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v2f32.p0v2f32(<2 x float>, ptr, i32, <2 x i1>)
 
-define void @masked_store_v2f64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
-; RV32-LABEL: masked_store_v2f64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vle64.v v8, (a2)
-; RV32-NEXT:    vle64.v v9, (a0)
-; RV32-NEXT:    fcvt.d.w fa5, zero
-; RV32-NEXT:    vmfeq.vf v0, v8, fa5
-; RV32-NEXT:    vse64.v v9, (a1), v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: masked_store_v2f64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT:    vle64.v v8, (a2)
-; RV64-NEXT:    vle64.v v9, (a0)
-; RV64-NEXT:    fmv.d.x fa5, zero
-; RV64-NEXT:    vmfeq.vf v0, v8, fa5
-; RV64-NEXT:    vse64.v v9, (a1), v0.t
-; RV64-NEXT:    ret
-  %m = load <2 x double>, ptr %m_ptr
-  %mask = fcmp oeq <2 x double> %m, zeroinitializer
-  %val = load <2 x double>, ptr %val_ptr
-  call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, ptr %a, i32 8, <2 x i1> %mask)
+define void @masked_store_v2f64(<2 x double> %val, ptr %a, <2 x i1> %mask) {
+; CHECK-LABEL: masked_store_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-NEXT:    vse64.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+  call void @llvm.masked.store.v2f64.p0(<2 x double> %val, ptr %a, i32 8, <2 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double>, ptr, i32, <2 x i1>)
 
-define void @masked_store_v4f16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
-; CHECK-LABEL: masked_store_v4f16:
+define void @masked_store_v4bf16(<4 x bfloat> %val, ptr %a, <4 x i1> %mask) {
+; CHECK-LABEL: masked_store_v4bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a2)
-; CHECK-NEXT:    vle16.v v9, (a0)
-; CHECK-NEXT:    fmv.h.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
-; CHECK-NEXT:    vse16.v v9, (a1), v0.t
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv.x.s a1, v0
+; CHECK-NEXT:    andi a2, a1, 1
+; CHECK-NEXT:    bnez a2, .LBB8_5
+; CHECK-NEXT:  # %bb.1: # %else
+; CHECK-NEXT:    andi a2, a1, 2
+; CHECK-NEXT:    bnez a2, .LBB8_6
+; CHECK-NEXT:  .LBB8_2: # %else2
+; CHECK-NEXT:    andi a2, a1, 4
+; CHECK-NEXT:    bnez a2, .LBB8_7
+; CHECK-NEXT:  .LBB8_3: # %else4
+; CHECK-NEXT:    andi a1, a1, 8
+; CHECK-NEXT:    bnez a1, .LBB8_8
+; CHECK-NEXT:  .LBB8_4: # %else6
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB8_5: # %cond.store
+; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vmv.x.s a2, v8
+; CHECK-NEXT:    fmv.h.x fa5, a2
+; CHECK-NEXT:    fsh fa5, 0(a0)
+; CHECK-NEXT:    andi a2, a1, 2
+; CHECK-NEXT:    beqz a2, .LBB8_2
+; CHECK-NEXT:  .LBB8_6: # %cond.store1
+; CHECK-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
+; CHECK-NEXT:    vslidedown.vi v9, v8, 1
+; CHECK-NEXT:    vmv.x.s a2, v9
+; CHECK-NEXT:    fmv.h.x fa5, a2
+; CHECK-NEXT:    fsh fa5, 2(a0)
+; CHECK-NEXT:    andi a2, a1, 4
+; CHECK-NEXT:    beqz a2, .LBB8_3
+; CHECK-NEXT:  .LBB8_7: # %cond.store3
+; CHECK-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
+; CHECK-NEXT:    vslidedown.vi v9, v8, 2
+; CHECK-NEXT:    vmv.x.s a2, v9
+; CHECK-NEXT:    fmv.h.x fa5, a2
+; CHECK-NEXT:    fsh fa5, 4(a0)
+; CHECK-NEXT:    andi a1, a1, 8
+; CHECK-NEXT:    beqz a1, .LBB8_4
+; CHECK-NEXT:  .LBB8_8: # %cond.store5
+; CHECK-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
+; CHECK-NEXT:    vslidedown.vi v8, v8, 3
+; CHECK-NEXT:    vmv.x.s a1, v8
+; CHECK-NEXT:    fmv.h.x fa5, a1
+; CHECK-NEXT:    fsh fa5, 6(a0)
 ; CHECK-NEXT:    ret
-  %m = load <4 x half>, ptr %m_ptr
-  %mask = fcmp oeq <4 x half> %m, zeroinitializer
-  %val = load <4 x half>, ptr %val_ptr
-  call void @llvm.masked.store.v4f16.p0v4f16(<4 x half> %val, ptr %a, i32 8, <4 x i1> %mask)
+  call void @llvm.masked.store.v4bf16.p0(<4 x bfloat> %val, ptr %a, i32 8, <4 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v4f16.p0v4f16(<4 x half>, ptr, i32, <4 x i1>)
 
-define void @masked_store_v4f32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v4f16(<4 x half> %val, ptr %a, <4 x i1> %mask) {
+; ZVFH-LABEL: masked_store_v4f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT:    vse16.v v8, (a0), v0.t
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: masked_store_v4f16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.x.s a1, v0
+; ZVFHMIN-NEXT:    andi a2, a1, 1
+; ZVFHMIN-NEXT:    bnez a2, .LBB9_5
+; ZVFHMIN-NEXT:  # %bb.1: # %else
+; ZVFHMIN-NEXT:    andi a2, a1, 2
+; ZVFHMIN-NEXT:    bnez a2, .LBB9_6
+; ZVFHMIN-NEXT:  .LBB9_2: # %else2
+; ZVFHMIN-NEXT:    andi a2, a1, 4
+; ZVFHMIN-NEXT:    bnez a2, .LBB9_7
+; ZVFHMIN-NEXT:  .LBB9_3: # %else4
+; ZVFHMIN-NEXT:    andi a1, a1, 8
+; ZVFHMIN-NEXT:    bnez a1, .LBB9_8
+; ZVFHMIN-NEXT:  .LBB9_4: # %else6
+; ZVFHMIN-NEXT:    ret
+; ZVFHMIN-NEXT:  .LBB9_5: # %cond.store
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; ZVFHMIN-NEXT:    fsh fa5, 0(a0)
+; ZVFHMIN-NEXT:    andi a2, a1, 2
+; ZVFHMIN-NEXT:    beqz a2, .LBB9_2
+; ZVFHMIN-NEXT:  .LBB9_6: # %cond.store1
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vslidedown.vi v9, v8, 1
+; ZVFHMIN-NEXT:    vmv.x.s a2, v9
+; ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; ZVFHMIN-NEXT:    fsh fa5, 2(a0)
+; ZVFHMIN-NEXT:    andi a2, a1, 4
+; ZVFHMIN-NEXT:    beqz a2, .LBB9_3
+; ZVFHMIN-NEXT:  .LBB9_7: # %cond.store3
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vslidedown.vi v9, v8, 2
+; ZVFHMIN-NEXT:    vmv.x.s a2, v9
+; ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; ZVFHMIN-NEXT:    fsh fa5, 4(a0)
+; ZVFHMIN-NEXT:    andi a1, a1, 8
+; ZVFHMIN-NEXT:    beqz a1, .LBB9_4
+; ZVFHMIN-NEXT:  .LBB9_8: # %cond.store5
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 3
+; ZVFHMIN-NEXT:    vmv.x.s a1, v8
+; ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; ZVFHMIN-NEXT:    fsh fa5, 6(a0)
+; ZVFHMIN-NEXT:    ret
+  call void @llvm.masked.store.v4f16.p0(<4 x half> %val, ptr %a, i32 8, <4 x i1> %mask)
+  ret void
+}
+
+define void @masked_store_v4f32(<4 x float> %val, ptr %a, <4 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v4f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a2)
-; CHECK-NEXT:    vle32.v v9, (a0)
-; CHECK-NEXT:    fmv.w.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
-; CHECK-NEXT:    vse32.v v9, (a1), v0.t
+; CHECK-NEXT:    vse32.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <4 x float>, ptr %m_ptr
-  %mask = fcmp oeq <4 x float> %m, zeroinitializer
-  %val = load <4 x float>, ptr %val_ptr
-  call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %val, ptr %a, i32 8, <4 x i1> %mask)
+  call void @llvm.masked.store.v4f32.p0(<4 x float> %val, ptr %a, i32 8, <4 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, ptr, i32, <4 x i1>)
 
-define void @masked_store_v4f64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
-; RV32-LABEL: masked_store_v4f64:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vle64.v v8, (a2)
-; RV32-NEXT:    vle64.v v10, (a0)
-; RV32-NEXT:    fcvt.d.w fa5, zero
-; RV32-NEXT:    vmfeq.vf v0, v8, fa5
-; RV32-NEXT:    vse64.v v10, (a1), v0.t
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: masked_store_v4f64:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT:    vle64.v v8, (a2)
-; RV64-NEXT:    vle64.v v10, (a0)
-; RV64-NEXT:    fmv.d.x fa5, zero
-; RV64-NEXT:    vmfeq.vf v0, v8, fa5
-; RV64-NEXT:    vse64.v v10, (a1), v0.t
-; RV64-NEXT:    ret
-  %m = load <4 x double>, ptr %m_ptr
-  %mask = fcmp oeq <4 x double> %m, zeroinitializer
-  %val = load <4 x double>, ptr %val_ptr
-  call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> %val, ptr %a, i32 8, <4 x i1> %mask)
+define void @masked_store_v4f64(<4 x double> %val, ptr %a, <4 x i1> %mask) {
+; CHECK-LABEL: masked_store_v4f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vse64.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+  call void @llvm.masked.store.v4f64.p0(<4 x double> %val, ptr %a, i32 8, <4 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v4f64.p0v4f64(<4 x double>, ptr, i32, <4 x i1>)
 
-define void @masked_store_v8f16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
-; CHECK-LABEL: masked_store_v8f16:
+define void @masked_store_v8bf16(<8 x bfloat> %val, ptr %a, <8 x i1> %mask) {
+; CHECK-LABEL: masked_store_v8bf16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a2)
-; CHECK-NEXT:    vle16.v v9, (a0)
-; CHECK-NEXT:    fmv.h.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
-; CHECK-NEXT:    vse16.v v9, (a1), v0.t
+; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT:    vmv.x.s a1, v0
+; CHECK-NEXT:    andi a2, a1, 1
+; CHECK-NEXT:    bnez a2, .LBB12_9
+; CHECK-NEXT:  # %bb.1: # %else
+; CHECK-NEXT:    andi a2, a1, 2
+; CHECK-NEXT:    bnez a2, .LBB12_10
+; CHECK-NEXT:  .LBB12_2: # %else2
+; CHECK-NEXT:    andi a2, a1, 4
+; CHECK-NEXT:    bnez a2, .LBB12_11
+; CHECK-NEXT:  .LBB12_3: # %else4
+; CHECK-NEXT:    andi a2, a1, 8
+; CHECK-NEXT:    bnez a2, .LBB12_12
+; CHECK-NEXT:  .LBB12_4: # %else6
+; CHECK-NEXT:    andi a2, a1, 16
+; CHECK-NEXT:    bnez a2, .LBB12_13
+; CHECK-NEXT:  .LBB12_5: # %else8
+; CHECK-NEXT:    andi a2, a1, 32
+; CHECK-NEXT:    bnez a2, .LBB12_14
+; CHECK-NEXT:  .LBB12_6: # %else10
+; CHECK-NEXT:    andi a2, a1, 64
+; CHECK-NEXT:    bnez a2, .LBB12_15
+; CHECK-NEXT:  .LBB12_7: # %else12
+; CHECK-NEXT:    andi a1, a1, -128
+; CHECK-NEXT:    bnez a1, .LBB12_16
+; CHECK-NEXT:  .LBB12_8: # %else14
 ; CHECK-NEXT:    ret
-  %m = load <8 x half>, ptr %m_ptr
-  %mask = fcmp oeq <8 x half> %m, zeroinitializer
-  %val = load <8 x half>, ptr %val_ptr
-  call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %val, ptr %a, i32 8, <8 x i1> %mask)
+; CHECK-NEXT:  .LBB12_9: # %cond.store
+; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vmv.x.s a2, v8
+; CHECK-NEXT:    fmv.h.x fa5, a2
+; CHECK-NEXT:    fsh fa5, 0(a0)
+; CHECK-NEXT:    andi a2, a1, 2
+; CHECK-NEXT:    beqz a2, .LBB12_2
+; CHECK-NEXT:  .LBB12_10: # %cond.store1
+; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT:    vslidedown.vi v9, v8, 1
+; CHECK-NEXT:    vmv.x.s a2, v9
+; CHECK-NEXT:    fmv.h.x fa5, a2
+; CHECK-NEXT:    fsh fa5, 2(a0)
+; CHECK-NEXT:    andi a2, a1, 4
+; CHECK-NEXT:    beqz a2, .LBB12_3
+; CHECK-NEXT:  .LBB12_11: # %cond.store3
+; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT:    vslidedown.vi v9, v8, 2
+; CHECK-NEXT:    vmv.x.s a2, v9
+; CHECK-NEXT:    fmv.h.x fa5, a2
+; CHECK-NEXT:    fsh fa5, 4(a0)
+; CHECK-NEXT:    andi a2, a1, 8
+; CHECK-NEXT:    beqz a2, .LBB12_4
+; CHECK-NEXT:  .LBB12_12: # %cond.store5
+; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT:    vslidedown.vi v9, v8, 3
+; CHECK-NEXT:    vmv.x.s a2, v9
+; CHECK-NEXT:    fmv.h.x fa5, a2
+; CHECK-NEXT:    fsh fa5, 6(a0)
+; CHECK-NEXT:    andi a2, a1, 16
+; CHECK-NEXT:    beqz a2, .LBB12_5
+; CHECK-NEXT:  .LBB12_13: # %cond.store7
+; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT:    vslidedown.vi v9, v8, 4
+; CHECK-NEXT:    vmv.x.s a2, v9
+; CHECK-NEXT:    fmv.h.x fa5, a2
+; CHECK-NEXT:    fsh fa5, 8(a0)
+; CHECK-NEXT:    andi a2, a1, 32
+; CHECK-NEXT:    beqz a2, .LBB12_6
+; CHECK-NEXT:  .LBB12_14: # %cond.store9
+; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT:    vslidedown.vi v9, v8, 5
+; CHECK-NEXT:    vmv.x.s a2, v9
+; CHECK-NEXT:    fmv.h.x fa5, a2
+; CHECK-NEXT:    fsh fa5, 10(a0)
+; CHECK-NEXT:    andi a2, a1, 64
+; CHECK-NEXT:    beqz a2, .LBB12_7
+; CHECK-NEXT:  .LBB12_15: # %cond.store11
+; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT:    vslidedown.vi v9, v8, 6
+; CHECK-NEXT:    vmv.x.s a2, v9
+; CHECK-NEXT:    fmv.h.x fa5, a2
+; CHECK-NEXT:    fsh fa5, 12(a0)
+; CHECK-NEXT:    andi a1, a1, -128
+; CHECK-NEXT:    beqz a1, .LBB12_8
+; CHECK-NEXT:  .LBB12_16: # %cond.store13
+; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT:    vslidedown.vi v8, v8, 7
+; CHECK-NEXT:    vmv.x.s a1, v8
+; CHECK-NEXT:    fmv.h.x fa5, a1
+; CHECK-NEXT:    fsh fa5, 14(a0)
+; CHECK-NEXT:    ret
+  call void @llvm.masked.store.v8bf16.p0(<8 x bfloat> %val, ptr %a, i32 8, <8 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v8f16.p0v8f16(<8 x half>, ptr, i32, <8 x i1>)
 
-define void @masked_store_v8f32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v8f16(<8 x half> %val, ptr %a, <8 x i1> %mask) {
+; ZVFH-LABEL: masked_store_v8f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT:    vse16.v v8, (a0), v0.t
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: masked_store_v8f16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
+; ZVFHMIN-NEXT:    vmv.x.s a1, v0
+; ZVFHMIN-NEXT:    andi a2, a1, 1
+; ZVFHMIN-NEXT:    bnez a2, .LBB13_9
+; ZVFHMIN-NEXT:  # %bb.1: # %else
+; ZVFHMIN-NEXT:    andi a2, a1, 2
+; ZVFHMIN-NEXT:    bnez a2, .LBB13_10
+; ZVFHMIN-NEXT:  .LBB13_2: # %else2
+; ZVFHMIN-NEXT:    andi a2, a1, 4
+; ZVFHMIN-NEXT:    bnez a2, .LBB13_11
+; ZVFHMIN-NEXT:  .LBB13_3: # %else4
+; ZVFHMIN-NEXT:    andi a2, a1, 8
+; ZVFHMIN-NEXT:    bnez a2, .LBB13_12
+; ZVFHMIN-NEXT:  .LBB13_4: # %else6
+; ZVFHMIN-NEXT:    andi a2, a1, 16
+; ZVFHMIN-NEXT:    bnez a2, .LBB13_13
+; ZVFHMIN-NEXT:  .LBB13_5: # %else8
+; ZVFHMIN-NEXT:    andi a2, a1, 32
+; ZVFHMIN-NEXT:    bnez a2, .LBB13_14
+; ZVFHMIN-NEXT:  .LBB13_6: # %else10
+; ZVFHMIN-NEXT:    andi a2, a1, 64
+; ZVFHMIN-NEXT:    bnez a2, .LBB13_15
+; ZVFHMIN-NEXT:  .LBB13_7: # %else12
+; ZVFHMIN-NEXT:    andi a1, a1, -128
+; ZVFHMIN-NEXT:    bnez a1, .LBB13_16
+; ZVFHMIN-NEXT:  .LBB13_8: # %else14
+; ZVFHMIN-NEXT:    ret
+; ZVFHMIN-NEXT:  .LBB13_9: # %cond.store
+; ZVFHMIN-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; ZVFHMIN-NEXT:    fsh fa5, 0(a0)
+; ZVFHMIN-NEXT:    andi a2, a1, 2
+; ZVFHMIN-NEXT:    beqz a2, .LBB13_2
+; ZVFHMIN-NEXT:  .LBB13_10: # %cond.store1
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vslidedown.vi v9, v8, 1
+; ZVFHMIN-NEXT:    vmv.x.s a2, v9
+; ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; ZVFHMIN-NEXT:    fsh fa5, 2(a0)
+; ZVFHMIN-NEXT:    andi a2, a1, 4
+; ZVFHMIN-NEXT:    beqz a2, .LBB13_3
+; ZVFHMIN-NEXT:  .LBB13_11: # %cond.store3
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vslidedown.vi v9, v8, 2
+; ZVFHMIN-NEXT:    vmv.x.s a2, v9
+; ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; ZVFHMIN-NEXT:    fsh fa5, 4(a0)
+; ZVFHMIN-NEXT:    andi a2, a1, 8
+; ZVFHMIN-NEXT:    beqz a2, .LBB13_4
+; ZVFHMIN-NEXT:  .LBB13_12: # %cond.store5
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vslidedown.vi v9, v8, 3
+; ZVFHMIN-NEXT:    vmv.x.s a2, v9
+; ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; ZVFHMIN-NEXT:    fsh fa5, 6(a0)
+; ZVFHMIN-NEXT:    andi a2, a1, 16
+; ZVFHMIN-NEXT:    beqz a2, .LBB13_5
+; ZVFHMIN-NEXT:  .LBB13_13: # %cond.store7
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vslidedown.vi v9, v8, 4
+; ZVFHMIN-NEXT:    vmv.x.s a2, v9
+; ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; ZVFHMIN-NEXT:    fsh fa5, 8(a0)
+; ZVFHMIN-NEXT:    andi a2, a1, 32
+; ZVFHMIN-NEXT:    beqz a2, .LBB13_6
+; ZVFHMIN-NEXT:  .LBB13_14: # %cond.store9
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vslidedown.vi v9, v8, 5
+; ZVFHMIN-NEXT:    vmv.x.s a2, v9
+; ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; ZVFHMIN-NEXT:    fsh fa5, 10(a0)
+; ZVFHMIN-NEXT:    andi a2, a1, 64
+; ZVFHMIN-NEXT:    beqz a2, .LBB13_7
+; ZVFHMIN-NEXT:  .LBB13_15: # %cond.store11
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vslidedown.vi v9, v8, 6
+; ZVFHMIN-NEXT:    vmv.x.s a2, v9
+; ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; ZVFHMIN-NEXT:    fsh fa5, 12(a0)
+; ZVFHMIN-NEXT:    andi a1, a1, -128
+; ZVFHMIN-NEXT:    beqz a1, .LBB13_8
+; ZVFHMIN-NEXT:  .LBB13_16: # %cond.store13
+; ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 7
+; ZVFHMIN-NEXT:    vmv.x.s a1, v8
+; ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; ZVFHMIN-NEXT:    fsh fa5, 14(a0)
+; ZVFHMIN-NEXT:    ret
+  call void @llvm.masked.store.v8f16.p0(<8 x half> %val, ptr %a, i32 8, <8 x i1> %mask)
+  ret void
+}
+
+define void @masked_store_v8f32(<8 x float> %val, ptr %a, <8 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v8f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a2)
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    fmv.w.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
-; CHECK-NEXT:    vse32.v v10, (a1), v0.t
+; CHECK-NEXT:    vse32.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <8 x float>, ptr %m_ptr
-  %mask = fcmp oeq <8 x float> %m, zeroinitializer
-  %val = load <8 x float>, ptr %val_ptr
-  call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %val, ptr %a, i32 8, <8 x i1> %mask)
+  call void @llvm.masked.store.v8f32.p0(<8 x float> %val, ptr %a, i32 8, <8 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, ptr, i32, <8 x i1>)
 
-define void @masked_store_v8f64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
-; RV32-LABEL: masked_store_v8f64:
+define void @masked_store_v8f64(<8 x double> %val, ptr %a, <8 x i1> %mask) {
+; CHECK-LABEL: masked_store_v8f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT:    vse64.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+  call void @llvm.masked.store.v8f64.p0(<8 x double> %val, ptr %a, i32 8, <8 x i1> %mask)
+  ret void
+}
+
+define void @masked_store_v16bf16(<16 x bfloat> %val, ptr %a, <16 x i1> %mask) {
+; RV32-LABEL: masked_store_v16bf16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-NEXT:    vle64.v v8, (a2)
-; RV32-NEXT:    vle64.v v12, (a0)
-; RV32-NEXT:    fcvt.d.w fa5, zero
-; RV32-NEXT:    vmfeq.vf v0, v8, fa5
-; RV32-NEXT:    vse64.v v12, (a1), v0.t
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vmv.x.s a1, v0
+; RV32-NEXT:    andi a2, a1, 1
+; RV32-NEXT:    bnez a2, .LBB16_19
+; RV32-NEXT:  # %bb.1: # %else
+; RV32-NEXT:    andi a2, a1, 2
+; RV32-NEXT:    bnez a2, .LBB16_20
+; RV32-NEXT:  .LBB16_2: # %else2
+; RV32-NEXT:    andi a2, a1, 4
+; RV32-NEXT:    bnez a2, .LBB16_21
+; RV32-NEXT:  .LBB16_3: # %else4
+; RV32-NEXT:    andi a2, a1, 8
+; RV32-NEXT:    bnez a2, .LBB16_22
+; RV32-NEXT:  .LBB16_4: # %else6
+; RV32-NEXT:    andi a2, a1, 16
+; RV32-NEXT:    bnez a2, .LBB16_23
+; RV32-NEXT:  .LBB16_5: # %else8
+; RV32-NEXT:    andi a2, a1, 32
+; RV32-NEXT:    bnez a2, .LBB16_24
+; RV32-NEXT:  .LBB16_6: # %else10
+; RV32-NEXT:    andi a2, a1, 64
+; RV32-NEXT:    bnez a2, .LBB16_25
+; RV32-NEXT:  .LBB16_7: # %else12
+; RV32-NEXT:    andi a2, a1, 128
+; RV32-NEXT:    bnez a2, .LBB16_26
+; RV32-NEXT:  .LBB16_8: # %else14
+; RV32-NEXT:    andi a2, a1, 256
+; RV32-NEXT:    bnez a2, .LBB16_27
+; RV32-NEXT:  .LBB16_9: # %else16
+; RV32-NEXT:    andi a2, a1, 512
+; RV32-NEXT:    bnez a2, .LBB16_28
+; RV32-NEXT:  .LBB16_10: # %else18
+; RV32-NEXT:    andi a2, a1, 1024
+; RV32-NEXT:    bnez a2, .LBB16_29
+; RV32-NEXT:  .LBB16_11: # %else20
+; RV32-NEXT:    slli a2, a1, 20
+; RV32-NEXT:    bltz a2, .LBB16_30
+; RV32-NEXT:  .LBB16_12: # %else22
+; RV32-NEXT:    slli a2, a1, 19
+; RV32-NEXT:    bltz a2, .LBB16_31
+; RV32-NEXT:  .LBB16_13: # %else24
+; RV32-NEXT:    slli a2, a1, 18
+; RV32-NEXT:    bltz a2, .LBB16_32
+; RV32-NEXT:  .LBB16_14: # %else26
+; RV32-NEXT:    slli a2, a1, 17
+; RV32-NEXT:    bgez a2, .LBB16_16
+; RV32-NEXT:  .LBB16_15: # %cond.store27
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v10, v8, 14
+; RV32-NEXT:    vmv.x.s a2, v10
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 28(a0)
+; RV32-NEXT:  .LBB16_16: # %else28
+; RV32-NEXT:    lui a2, 1048568
+; RV32-NEXT:    and a1, a1, a2
+; RV32-NEXT:    beqz a1, .LBB16_18
+; RV32-NEXT:  # %bb.17: # %cond.store29
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v8, v8, 15
+; RV32-NEXT:    vmv.x.s a1, v8
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 30(a0)
+; RV32-NEXT:  .LBB16_18: # %else30
 ; RV32-NEXT:    ret
+; RV32-NEXT:  .LBB16_19: # %cond.store
+; RV32-NEXT:    vmv.x.s a2, v8
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 0(a0)
+; RV32-NEXT:    andi a2, a1, 2
+; RV32-NEXT:    beqz a2, .LBB16_2
+; RV32-NEXT:  .LBB16_20: # %cond.store1
+; RV32-NEXT:    vslidedown.vi v10, v8, 1
+; RV32-NEXT:    vmv.x.s a2, v10
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 2(a0)
+; RV32-NEXT:    andi a2, a1, 4
+; RV32-NEXT:    beqz a2, .LBB16_3
+; RV32-NEXT:  .LBB16_21: # %cond.store3
+; RV32-NEXT:    vslidedown.vi v10, v8, 2
+; RV32-NEXT:    vmv.x.s a2, v10
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 4(a0)
+; RV32-NEXT:    andi a2, a1, 8
+; RV32-NEXT:    beqz a2, .LBB16_4
+; RV32-NEXT:  .LBB16_22: # %cond.store5
+; RV32-NEXT:    vslidedown.vi v10, v8, 3
+; RV32-NEXT:    vmv.x.s a2, v10
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 6(a0)
+; RV32-NEXT:    andi a2, a1, 16
+; RV32-NEXT:    beqz a2, .LBB16_5
+; RV32-NEXT:  .LBB16_23: # %cond.store7
+; RV32-NEXT:    vslidedown.vi v10, v8, 4
+; RV32-NEXT:    vmv.x.s a2, v10
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 8(a0)
+; RV32-NEXT:    andi a2, a1, 32
+; RV32-NEXT:    beqz a2, .LBB16_6
+; RV32-NEXT:  .LBB16_24: # %cond.store9
+; RV32-NEXT:    vslidedown.vi v10, v8, 5
+; RV32-NEXT:    vmv.x.s a2, v10
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 10(a0)
+; RV32-NEXT:    andi a2, a1, 64
+; RV32-NEXT:    beqz a2, .LBB16_7
+; RV32-NEXT:  .LBB16_25: # %cond.store11
+; RV32-NEXT:    vslidedown.vi v10, v8, 6
+; RV32-NEXT:    vmv.x.s a2, v10
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 12(a0)
+; RV32-NEXT:    andi a2, a1, 128
+; RV32-NEXT:    beqz a2, .LBB16_8
+; RV32-NEXT:  .LBB16_26: # %cond.store13
+; RV32-NEXT:    vslidedown.vi v10, v8, 7
+; RV32-NEXT:    vmv.x.s a2, v10
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 14(a0)
+; RV32-NEXT:    andi a2, a1, 256
+; RV32-NEXT:    beqz a2, .LBB16_9
+; RV32-NEXT:  .LBB16_27: # %cond.store15
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v10, v8, 8
+; RV32-NEXT:    vmv.x.s a2, v10
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 16(a0)
+; RV32-NEXT:    andi a2, a1, 512
+; RV32-NEXT:    beqz a2, .LBB16_10
+; RV32-NEXT:  .LBB16_28: # %cond.store17
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v10, v8, 9
+; RV32-NEXT:    vmv.x.s a2, v10
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 18(a0)
+; RV32-NEXT:    andi a2, a1, 1024
+; RV32-NEXT:    beqz a2, .LBB16_11
+; RV32-NEXT:  .LBB16_29: # %cond.store19
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v10, v8, 10
+; RV32-NEXT:    vmv.x.s a2, v10
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 20(a0)
+; RV32-NEXT:    slli a2, a1, 20
+; RV32-NEXT:    bgez a2, .LBB16_12
+; RV32-NEXT:  .LBB16_30: # %cond.store21
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v10, v8, 11
+; RV32-NEXT:    vmv.x.s a2, v10
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 22(a0)
+; RV32-NEXT:    slli a2, a1, 19
+; RV32-NEXT:    bgez a2, .LBB16_13
+; RV32-NEXT:  .LBB16_31: # %cond.store23
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v10, v8, 12
+; RV32-NEXT:    vmv.x.s a2, v10
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 24(a0)
+; RV32-NEXT:    slli a2, a1, 18
+; RV32-NEXT:    bgez a2, .LBB16_14
+; RV32-NEXT:  .LBB16_32: # %cond.store25
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v10, v8, 13
+; RV32-NEXT:    vmv.x.s a2, v10
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 26(a0)
+; RV32-NEXT:    slli a2, a1, 17
+; RV32-NEXT:    bltz a2, .LBB16_15
+; RV32-NEXT:    j .LBB16_16
 ;
-; RV64-LABEL: masked_store_v8f64:
+; RV64-LABEL: masked_store_v16bf16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT:    vle64.v v8, (a2)
-; RV64-NEXT:    vle64.v v12, (a0)
-; RV64-NEXT:    fmv.d.x fa5, zero
-; RV64-NEXT:    vmfeq.vf v0, v8, fa5
-; RV64-NEXT:    vse64.v v12, (a1), v0.t
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vmv.x.s a1, v0
+; RV64-NEXT:    andi a2, a1, 1
+; RV64-NEXT:    bnez a2, .LBB16_19
+; RV64-NEXT:  # %bb.1: # %else
+; RV64-NEXT:    andi a2, a1, 2
+; RV64-NEXT:    bnez a2, .LBB16_20
+; RV64-NEXT:  .LBB16_2: # %else2
+; RV64-NEXT:    andi a2, a1, 4
+; RV64-NEXT:    bnez a2, .LBB16_21
+; RV64-NEXT:  .LBB16_3: # %else4
+; RV64-NEXT:    andi a2, a1, 8
+; RV64-NEXT:    bnez a2, .LBB16_22
+; RV64-NEXT:  .LBB16_4: # %else6
+; RV64-NEXT:    andi a2, a1, 16
+; RV64-NEXT:    bnez a2, .LBB16_23
+; RV64-NEXT:  .LBB16_5: # %else8
+; RV64-NEXT:    andi a2, a1, 32
+; RV64-NEXT:    bnez a2, .LBB16_24
+; RV64-NEXT:  .LBB16_6: # %else10
+; RV64-NEXT:    andi a2, a1, 64
+; RV64-NEXT:    bnez a2, .LBB16_25
+; RV64-NEXT:  .LBB16_7: # %else12
+; RV64-NEXT:    andi a2, a1, 128
+; RV64-NEXT:    bnez a2, .LBB16_26
+; RV64-NEXT:  .LBB16_8: # %else14
+; RV64-NEXT:    andi a2, a1, 256
+; RV64-NEXT:    bnez a2, .LBB16_27
+; RV64-NEXT:  .LBB16_9: # %else16
+; RV64-NEXT:    andi a2, a1, 512
+; RV64-NEXT:    bnez a2, .LBB16_28
+; RV64-NEXT:  .LBB16_10: # %else18
+; RV64-NEXT:    andi a2, a1, 1024
+; RV64-NEXT:    bnez a2, .LBB16_29
+; RV64-NEXT:  .LBB16_11: # %else20
+; RV64-NEXT:    slli a2, a1, 52
+; RV64-NEXT:    bltz a2, .LBB16_30
+; RV64-NEXT:  .LBB16_12: # %else22
+; RV64-NEXT:    slli a2, a1, 51
+; RV64-NEXT:    bltz a2, .LBB16_31
+; RV64-NEXT:  .LBB16_13: # %else24
+; RV64-NEXT:    slli a2, a1, 50
+; RV64-NEXT:    bltz a2, .LBB16_32
+; RV64-NEXT:  .LBB16_14: # %else26
+; RV64-NEXT:    slli a2, a1, 49
+; RV64-NEXT:    bgez a2, .LBB16_16
+; RV64-NEXT:  .LBB16_15: # %cond.store27
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v10, v8, 14
+; RV64-NEXT:    vmv.x.s a2, v10
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 28(a0)
+; RV64-NEXT:  .LBB16_16: # %else28
+; RV64-NEXT:    lui a2, 1048568
+; RV64-NEXT:    and a1, a1, a2
+; RV64-NEXT:    beqz a1, .LBB16_18
+; RV64-NEXT:  # %bb.17: # %cond.store29
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v8, v8, 15
+; RV64-NEXT:    vmv.x.s a1, v8
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 30(a0)
+; RV64-NEXT:  .LBB16_18: # %else30
 ; RV64-NEXT:    ret
-  %m = load <8 x double>, ptr %m_ptr
-  %mask = fcmp oeq <8 x double> %m, zeroinitializer
-  %val = load <8 x double>, ptr %val_ptr
-  call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> %val, ptr %a, i32 8, <8 x i1> %mask)
+; RV64-NEXT:  .LBB16_19: # %cond.store
+; RV64-NEXT:    vmv.x.s a2, v8
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 0(a0)
+; RV64-NEXT:    andi a2, a1, 2
+; RV64-NEXT:    beqz a2, .LBB16_2
+; RV64-NEXT:  .LBB16_20: # %cond.store1
+; RV64-NEXT:    vslidedown.vi v10, v8, 1
+; RV64-NEXT:    vmv.x.s a2, v10
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 2(a0)
+; RV64-NEXT:    andi a2, a1, 4
+; RV64-NEXT:    beqz a2, .LBB16_3
+; RV64-NEXT:  .LBB16_21: # %cond.store3
+; RV64-NEXT:    vslidedown.vi v10, v8, 2
+; RV64-NEXT:    vmv.x.s a2, v10
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 4(a0)
+; RV64-NEXT:    andi a2, a1, 8
+; RV64-NEXT:    beqz a2, .LBB16_4
+; RV64-NEXT:  .LBB16_22: # %cond.store5
+; RV64-NEXT:    vslidedown.vi v10, v8, 3
+; RV64-NEXT:    vmv.x.s a2, v10
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 6(a0)
+; RV64-NEXT:    andi a2, a1, 16
+; RV64-NEXT:    beqz a2, .LBB16_5
+; RV64-NEXT:  .LBB16_23: # %cond.store7
+; RV64-NEXT:    vslidedown.vi v10, v8, 4
+; RV64-NEXT:    vmv.x.s a2, v10
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 8(a0)
+; RV64-NEXT:    andi a2, a1, 32
+; RV64-NEXT:    beqz a2, .LBB16_6
+; RV64-NEXT:  .LBB16_24: # %cond.store9
+; RV64-NEXT:    vslidedown.vi v10, v8, 5
+; RV64-NEXT:    vmv.x.s a2, v10
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 10(a0)
+; RV64-NEXT:    andi a2, a1, 64
+; RV64-NEXT:    beqz a2, .LBB16_7
+; RV64-NEXT:  .LBB16_25: # %cond.store11
+; RV64-NEXT:    vslidedown.vi v10, v8, 6
+; RV64-NEXT:    vmv.x.s a2, v10
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 12(a0)
+; RV64-NEXT:    andi a2, a1, 128
+; RV64-NEXT:    beqz a2, .LBB16_8
+; RV64-NEXT:  .LBB16_26: # %cond.store13
+; RV64-NEXT:    vslidedown.vi v10, v8, 7
+; RV64-NEXT:    vmv.x.s a2, v10
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 14(a0)
+; RV64-NEXT:    andi a2, a1, 256
+; RV64-NEXT:    beqz a2, .LBB16_9
+; RV64-NEXT:  .LBB16_27: # %cond.store15
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v10, v8, 8
+; RV64-NEXT:    vmv.x.s a2, v10
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 16(a0)
+; RV64-NEXT:    andi a2, a1, 512
+; RV64-NEXT:    beqz a2, .LBB16_10
+; RV64-NEXT:  .LBB16_28: # %cond.store17
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v10, v8, 9
+; RV64-NEXT:    vmv.x.s a2, v10
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 18(a0)
+; RV64-NEXT:    andi a2, a1, 1024
+; RV64-NEXT:    beqz a2, .LBB16_11
+; RV64-NEXT:  .LBB16_29: # %cond.store19
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v10, v8, 10
+; RV64-NEXT:    vmv.x.s a2, v10
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 20(a0)
+; RV64-NEXT:    slli a2, a1, 52
+; RV64-NEXT:    bgez a2, .LBB16_12
+; RV64-NEXT:  .LBB16_30: # %cond.store21
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v10, v8, 11
+; RV64-NEXT:    vmv.x.s a2, v10
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 22(a0)
+; RV64-NEXT:    slli a2, a1, 51
+; RV64-NEXT:    bgez a2, .LBB16_13
+; RV64-NEXT:  .LBB16_31: # %cond.store23
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v10, v8, 12
+; RV64-NEXT:    vmv.x.s a2, v10
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 24(a0)
+; RV64-NEXT:    slli a2, a1, 50
+; RV64-NEXT:    bgez a2, .LBB16_14
+; RV64-NEXT:  .LBB16_32: # %cond.store25
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v10, v8, 13
+; RV64-NEXT:    vmv.x.s a2, v10
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 26(a0)
+; RV64-NEXT:    slli a2, a1, 49
+; RV64-NEXT:    bltz a2, .LBB16_15
+; RV64-NEXT:    j .LBB16_16
+  call void @llvm.masked.store.v16bf16.p0(<16 x bfloat> %val, ptr %a, i32 8, <16 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v8f64.p0v8f64(<8 x double>, ptr, i32, <8 x i1>)
 
-define void @masked_store_v16f16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
-; CHECK-LABEL: masked_store_v16f16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a2)
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    fmv.h.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
-; CHECK-NEXT:    vse16.v v10, (a1), v0.t
-; CHECK-NEXT:    ret
-  %m = load <16 x half>, ptr %m_ptr
-  %mask = fcmp oeq <16 x half> %m, zeroinitializer
-  %val = load <16 x half>, ptr %val_ptr
-  call void @llvm.masked.store.v16f16.p0v16f16(<16 x half> %val, ptr %a, i32 8, <16 x i1> %mask)
+define void @masked_store_v16f16(<16 x half> %val, ptr %a, <16 x i1> %mask) {
+; ZVFH-LABEL: masked_store_v16f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT:    vse16.v v8, (a0), v0.t
+; ZVFH-NEXT:    ret
+;
+; RV32-ZVFHMIN-LABEL: masked_store_v16f16:
+; RV32-ZVFHMIN:       # %bb.0:
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v0
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 1
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB17_19
+; RV32-ZVFHMIN-NEXT:  # %bb.1: # %else
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 2
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB17_20
+; RV32-ZVFHMIN-NEXT:  .LBB17_2: # %else2
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 4
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB17_21
+; RV32-ZVFHMIN-NEXT:  .LBB17_3: # %else4
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 8
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB17_22
+; RV32-ZVFHMIN-NEXT:  .LBB17_4: # %else6
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 16
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB17_23
+; RV32-ZVFHMIN-NEXT:  .LBB17_5: # %else8
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 32
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB17_24
+; RV32-ZVFHMIN-NEXT:  .LBB17_6: # %else10
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 64
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB17_25
+; RV32-ZVFHMIN-NEXT:  .LBB17_7: # %else12
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 128
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB17_26
+; RV32-ZVFHMIN-NEXT:  .LBB17_8: # %else14
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 256
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB17_27
+; RV32-ZVFHMIN-NEXT:  .LBB17_9: # %else16
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 512
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB17_28
+; RV32-ZVFHMIN-NEXT:  .LBB17_10: # %else18
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 1024
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB17_29
+; RV32-ZVFHMIN-NEXT:  .LBB17_11: # %else20
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 20
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB17_30
+; RV32-ZVFHMIN-NEXT:  .LBB17_12: # %else22
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 19
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB17_31
+; RV32-ZVFHMIN-NEXT:  .LBB17_13: # %else24
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 18
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB17_32
+; RV32-ZVFHMIN-NEXT:  .LBB17_14: # %else26
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 17
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB17_16
+; RV32-ZVFHMIN-NEXT:  .LBB17_15: # %cond.store27
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 14
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 28(a0)
+; RV32-ZVFHMIN-NEXT:  .LBB17_16: # %else28
+; RV32-ZVFHMIN-NEXT:    lui a2, 1048568
+; RV32-ZVFHMIN-NEXT:    and a1, a1, a2
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB17_18
+; RV32-ZVFHMIN-NEXT:  # %bb.17: # %cond.store29
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 15
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v8
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 30(a0)
+; RV32-ZVFHMIN-NEXT:  .LBB17_18: # %else30
+; RV32-ZVFHMIN-NEXT:    ret
+; RV32-ZVFHMIN-NEXT:  .LBB17_19: # %cond.store
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 0(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 2
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB17_2
+; RV32-ZVFHMIN-NEXT:  .LBB17_20: # %cond.store1
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 1
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 2(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 4
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB17_3
+; RV32-ZVFHMIN-NEXT:  .LBB17_21: # %cond.store3
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 2
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 4(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 8
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB17_4
+; RV32-ZVFHMIN-NEXT:  .LBB17_22: # %cond.store5
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 3
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 6(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 16
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB17_5
+; RV32-ZVFHMIN-NEXT:  .LBB17_23: # %cond.store7
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 4
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 8(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 32
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB17_6
+; RV32-ZVFHMIN-NEXT:  .LBB17_24: # %cond.store9
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 5
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 10(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 64
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB17_7
+; RV32-ZVFHMIN-NEXT:  .LBB17_25: # %cond.store11
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 6
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 12(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 128
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB17_8
+; RV32-ZVFHMIN-NEXT:  .LBB17_26: # %cond.store13
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 7
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 14(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 256
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB17_9
+; RV32-ZVFHMIN-NEXT:  .LBB17_27: # %cond.store15
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 8
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 16(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 512
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB17_10
+; RV32-ZVFHMIN-NEXT:  .LBB17_28: # %cond.store17
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 9
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 18(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 1024
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB17_11
+; RV32-ZVFHMIN-NEXT:  .LBB17_29: # %cond.store19
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 10
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 20(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 20
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB17_12
+; RV32-ZVFHMIN-NEXT:  .LBB17_30: # %cond.store21
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 11
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 22(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 19
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB17_13
+; RV32-ZVFHMIN-NEXT:  .LBB17_31: # %cond.store23
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 12
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 24(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 18
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB17_14
+; RV32-ZVFHMIN-NEXT:  .LBB17_32: # %cond.store25
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 13
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v10
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 26(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 17
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB17_15
+; RV32-ZVFHMIN-NEXT:    j .LBB17_16
+;
+; RV64-ZVFHMIN-LABEL: masked_store_v16f16:
+; RV64-ZVFHMIN:       # %bb.0:
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v0
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 1
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB17_19
+; RV64-ZVFHMIN-NEXT:  # %bb.1: # %else
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 2
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB17_20
+; RV64-ZVFHMIN-NEXT:  .LBB17_2: # %else2
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 4
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB17_21
+; RV64-ZVFHMIN-NEXT:  .LBB17_3: # %else4
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 8
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB17_22
+; RV64-ZVFHMIN-NEXT:  .LBB17_4: # %else6
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 16
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB17_23
+; RV64-ZVFHMIN-NEXT:  .LBB17_5: # %else8
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 32
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB17_24
+; RV64-ZVFHMIN-NEXT:  .LBB17_6: # %else10
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 64
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB17_25
+; RV64-ZVFHMIN-NEXT:  .LBB17_7: # %else12
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 128
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB17_26
+; RV64-ZVFHMIN-NEXT:  .LBB17_8: # %else14
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 256
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB17_27
+; RV64-ZVFHMIN-NEXT:  .LBB17_9: # %else16
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 512
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB17_28
+; RV64-ZVFHMIN-NEXT:  .LBB17_10: # %else18
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 1024
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB17_29
+; RV64-ZVFHMIN-NEXT:  .LBB17_11: # %else20
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 52
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB17_30
+; RV64-ZVFHMIN-NEXT:  .LBB17_12: # %else22
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 51
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB17_31
+; RV64-ZVFHMIN-NEXT:  .LBB17_13: # %else24
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 50
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB17_32
+; RV64-ZVFHMIN-NEXT:  .LBB17_14: # %else26
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 49
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB17_16
+; RV64-ZVFHMIN-NEXT:  .LBB17_15: # %cond.store27
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 14
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 28(a0)
+; RV64-ZVFHMIN-NEXT:  .LBB17_16: # %else28
+; RV64-ZVFHMIN-NEXT:    lui a2, 1048568
+; RV64-ZVFHMIN-NEXT:    and a1, a1, a2
+; RV64-ZVFHMIN-NEXT:    beqz a1, .LBB17_18
+; RV64-ZVFHMIN-NEXT:  # %bb.17: # %cond.store29
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v8, 15
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v8
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 30(a0)
+; RV64-ZVFHMIN-NEXT:  .LBB17_18: # %else30
+; RV64-ZVFHMIN-NEXT:    ret
+; RV64-ZVFHMIN-NEXT:  .LBB17_19: # %cond.store
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 0(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 2
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB17_2
+; RV64-ZVFHMIN-NEXT:  .LBB17_20: # %cond.store1
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 1
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 2(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 4
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB17_3
+; RV64-ZVFHMIN-NEXT:  .LBB17_21: # %cond.store3
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 2
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 4(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 8
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB17_4
+; RV64-ZVFHMIN-NEXT:  .LBB17_22: # %cond.store5
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 3
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 6(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 16
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB17_5
+; RV64-ZVFHMIN-NEXT:  .LBB17_23: # %cond.store7
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 4
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 8(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 32
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB17_6
+; RV64-ZVFHMIN-NEXT:  .LBB17_24: # %cond.store9
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 5
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 10(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 64
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB17_7
+; RV64-ZVFHMIN-NEXT:  .LBB17_25: # %cond.store11
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 6
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 12(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 128
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB17_8
+; RV64-ZVFHMIN-NEXT:  .LBB17_26: # %cond.store13
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 7
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 14(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 256
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB17_9
+; RV64-ZVFHMIN-NEXT:  .LBB17_27: # %cond.store15
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 8
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 16(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 512
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB17_10
+; RV64-ZVFHMIN-NEXT:  .LBB17_28: # %cond.store17
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 9
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 18(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 1024
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB17_11
+; RV64-ZVFHMIN-NEXT:  .LBB17_29: # %cond.store19
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 10
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 20(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 52
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB17_12
+; RV64-ZVFHMIN-NEXT:  .LBB17_30: # %cond.store21
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 11
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 22(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 51
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB17_13
+; RV64-ZVFHMIN-NEXT:  .LBB17_31: # %cond.store23
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 12
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 24(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 50
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB17_14
+; RV64-ZVFHMIN-NEXT:  .LBB17_32: # %cond.store25
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v10, v8, 13
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v10
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 26(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 49
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB17_15
+; RV64-ZVFHMIN-NEXT:    j .LBB17_16
+  call void @llvm.masked.store.v16f16.p0(<16 x half> %val, ptr %a, i32 8, <16 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v16f16.p0v16f16(<16 x half>, ptr, i32, <16 x i1>)
 
-define void @masked_store_v16f32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v16f32(<16 x float> %val, ptr %a, <16 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v16f32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a2)
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    fmv.w.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
-; CHECK-NEXT:    vse32.v v12, (a1), v0.t
+; CHECK-NEXT:    vse32.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+  call void @llvm.masked.store.v16f32.p0(<16 x float> %val, ptr %a, i32 8, <16 x i1> %mask)
+  ret void
+}
+
+define void @masked_store_v16f64(<16 x double> %val, ptr %a, <16 x i1> %mask) {
+; CHECK-LABEL: masked_store_v16f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT:    vse64.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <16 x float>, ptr %m_ptr
-  %mask = fcmp oeq <16 x float> %m, zeroinitializer
-  %val = load <16 x float>, ptr %val_ptr
-  call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> %val, ptr %a, i32 8, <16 x i1> %mask)
+  call void @llvm.masked.store.v16f64.p0(<16 x double> %val, ptr %a, i32 8, <16 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, ptr, i32, <16 x i1>)
 
-define void @masked_store_v16f64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
-; RV32-LABEL: masked_store_v16f64:
+define void @masked_store_v32bf16(<32 x bfloat> %val, ptr %a, <32 x i1> %mask) {
+; RV32-LABEL: masked_store_v32bf16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vle64.v v8, (a2)
-; RV32-NEXT:    vle64.v v16, (a0)
-; RV32-NEXT:    fcvt.d.w fa5, zero
-; RV32-NEXT:    vmfeq.vf v0, v8, fa5
-; RV32-NEXT:    vse64.v v16, (a1), v0.t
+; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT:    vmv.x.s a1, v0
+; RV32-NEXT:    andi a2, a1, 1
+; RV32-NEXT:    bnez a2, .LBB20_35
+; RV32-NEXT:  # %bb.1: # %else
+; RV32-NEXT:    andi a2, a1, 2
+; RV32-NEXT:    bnez a2, .LBB20_36
+; RV32-NEXT:  .LBB20_2: # %else2
+; RV32-NEXT:    andi a2, a1, 4
+; RV32-NEXT:    bnez a2, .LBB20_37
+; RV32-NEXT:  .LBB20_3: # %else4
+; RV32-NEXT:    andi a2, a1, 8
+; RV32-NEXT:    bnez a2, .LBB20_38
+; RV32-NEXT:  .LBB20_4: # %else6
+; RV32-NEXT:    andi a2, a1, 16
+; RV32-NEXT:    bnez a2, .LBB20_39
+; RV32-NEXT:  .LBB20_5: # %else8
+; RV32-NEXT:    andi a2, a1, 32
+; RV32-NEXT:    bnez a2, .LBB20_40
+; RV32-NEXT:  .LBB20_6: # %else10
+; RV32-NEXT:    andi a2, a1, 64
+; RV32-NEXT:    bnez a2, .LBB20_41
+; RV32-NEXT:  .LBB20_7: # %else12
+; RV32-NEXT:    andi a2, a1, 128
+; RV32-NEXT:    bnez a2, .LBB20_42
+; RV32-NEXT:  .LBB20_8: # %else14
+; RV32-NEXT:    andi a2, a1, 256
+; RV32-NEXT:    bnez a2, .LBB20_43
+; RV32-NEXT:  .LBB20_9: # %else16
+; RV32-NEXT:    andi a2, a1, 512
+; RV32-NEXT:    bnez a2, .LBB20_44
+; RV32-NEXT:  .LBB20_10: # %else18
+; RV32-NEXT:    andi a2, a1, 1024
+; RV32-NEXT:    bnez a2, .LBB20_45
+; RV32-NEXT:  .LBB20_11: # %else20
+; RV32-NEXT:    slli a2, a1, 20
+; RV32-NEXT:    bltz a2, .LBB20_46
+; RV32-NEXT:  .LBB20_12: # %else22
+; RV32-NEXT:    slli a2, a1, 19
+; RV32-NEXT:    bltz a2, .LBB20_47
+; RV32-NEXT:  .LBB20_13: # %else24
+; RV32-NEXT:    slli a2, a1, 18
+; RV32-NEXT:    bltz a2, .LBB20_48
+; RV32-NEXT:  .LBB20_14: # %else26
+; RV32-NEXT:    slli a2, a1, 17
+; RV32-NEXT:    bltz a2, .LBB20_49
+; RV32-NEXT:  .LBB20_15: # %else28
+; RV32-NEXT:    slli a2, a1, 16
+; RV32-NEXT:    bgez a2, .LBB20_17
+; RV32-NEXT:  .LBB20_16: # %cond.store29
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v12, v8, 15
+; RV32-NEXT:    vmv.x.s a2, v12
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 30(a0)
+; RV32-NEXT:  .LBB20_17: # %else30
+; RV32-NEXT:    addi sp, sp, -1152
+; RV32-NEXT:    .cfi_def_cfa_offset 1152
+; RV32-NEXT:    sw ra, 1148(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 1144(sp) # 4-byte Folded Spill
+; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    .cfi_offset s0, -8
+; RV32-NEXT:    addi s0, sp, 1152
+; RV32-NEXT:    .cfi_def_cfa s0, 0
+; RV32-NEXT:    .cfi_remember_state
+; RV32-NEXT:    andi sp, sp, -64
+; RV32-NEXT:    slli a2, a1, 15
+; RV32-NEXT:    bltz a2, .LBB20_50
+; RV32-NEXT:  # %bb.18: # %else32
+; RV32-NEXT:    slli a2, a1, 14
+; RV32-NEXT:    bltz a2, .LBB20_51
+; RV32-NEXT:  .LBB20_19: # %else34
+; RV32-NEXT:    slli a2, a1, 13
+; RV32-NEXT:    bltz a2, .LBB20_52
+; RV32-NEXT:  .LBB20_20: # %else36
+; RV32-NEXT:    slli a2, a1, 12
+; RV32-NEXT:    bltz a2, .LBB20_53
+; RV32-NEXT:  .LBB20_21: # %else38
+; RV32-NEXT:    slli a2, a1, 11
+; RV32-NEXT:    bltz a2, .LBB20_54
+; RV32-NEXT:  .LBB20_22: # %else40
+; RV32-NEXT:    slli a2, a1, 10
+; RV32-NEXT:    bltz a2, .LBB20_55
+; RV32-NEXT:  .LBB20_23: # %else42
+; RV32-NEXT:    slli a2, a1, 9
+; RV32-NEXT:    bltz a2, .LBB20_56
+; RV32-NEXT:  .LBB20_24: # %else44
+; RV32-NEXT:    slli a2, a1, 8
+; RV32-NEXT:    bltz a2, .LBB20_57
+; RV32-NEXT:  .LBB20_25: # %else46
+; RV32-NEXT:    slli a2, a1, 7
+; RV32-NEXT:    bltz a2, .LBB20_58
+; RV32-NEXT:  .LBB20_26: # %else48
+; RV32-NEXT:    slli a2, a1, 6
+; RV32-NEXT:    bltz a2, .LBB20_59
+; RV32-NEXT:  .LBB20_27: # %else50
+; RV32-NEXT:    slli a2, a1, 5
+; RV32-NEXT:    bltz a2, .LBB20_60
+; RV32-NEXT:  .LBB20_28: # %else52
+; RV32-NEXT:    slli a2, a1, 4
+; RV32-NEXT:    bltz a2, .LBB20_61
+; RV32-NEXT:  .LBB20_29: # %else54
+; RV32-NEXT:    slli a2, a1, 3
+; RV32-NEXT:    bltz a2, .LBB20_62
+; RV32-NEXT:  .LBB20_30: # %else56
+; RV32-NEXT:    slli a2, a1, 2
+; RV32-NEXT:    bltz a2, .LBB20_63
+; RV32-NEXT:  .LBB20_31: # %else58
+; RV32-NEXT:    slli a2, a1, 1
+; RV32-NEXT:    bltz a2, .LBB20_64
+; RV32-NEXT:  .LBB20_32: # %else60
+; RV32-NEXT:    bgez a1, .LBB20_34
+; RV32-NEXT:  .LBB20_33: # %cond.store61
+; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    addi a2, sp, 64
+; RV32-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
+; RV32-NEXT:    vse16.v v8, (a2)
+; RV32-NEXT:    lh a1, 126(sp)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 62(a0)
+; RV32-NEXT:  .LBB20_34: # %else62
+; RV32-NEXT:    addi sp, s0, -1152
+; RV32-NEXT:    .cfi_def_cfa sp, 1152
+; RV32-NEXT:    lw ra, 1148(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 1144(sp) # 4-byte Folded Reload
+; RV32-NEXT:    .cfi_restore ra
+; RV32-NEXT:    .cfi_restore s0
+; RV32-NEXT:    addi sp, sp, 1152
+; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
+; RV32-NEXT:  .LBB20_35: # %cond.store
+; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; RV32-NEXT:    vmv.x.s a2, v8
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 0(a0)
+; RV32-NEXT:    andi a2, a1, 2
+; RV32-NEXT:    beqz a2, .LBB20_2
+; RV32-NEXT:  .LBB20_36: # %cond.store1
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v12, v8, 1
+; RV32-NEXT:    vmv.x.s a2, v12
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 2(a0)
+; RV32-NEXT:    andi a2, a1, 4
+; RV32-NEXT:    beqz a2, .LBB20_3
+; RV32-NEXT:  .LBB20_37: # %cond.store3
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v12, v8, 2
+; RV32-NEXT:    vmv.x.s a2, v12
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 4(a0)
+; RV32-NEXT:    andi a2, a1, 8
+; RV32-NEXT:    beqz a2, .LBB20_4
+; RV32-NEXT:  .LBB20_38: # %cond.store5
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v12, v8, 3
+; RV32-NEXT:    vmv.x.s a2, v12
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 6(a0)
+; RV32-NEXT:    andi a2, a1, 16
+; RV32-NEXT:    beqz a2, .LBB20_5
+; RV32-NEXT:  .LBB20_39: # %cond.store7
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v12, v8, 4
+; RV32-NEXT:    vmv.x.s a2, v12
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 8(a0)
+; RV32-NEXT:    andi a2, a1, 32
+; RV32-NEXT:    beqz a2, .LBB20_6
+; RV32-NEXT:  .LBB20_40: # %cond.store9
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v12, v8, 5
+; RV32-NEXT:    vmv.x.s a2, v12
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 10(a0)
+; RV32-NEXT:    andi a2, a1, 64
+; RV32-NEXT:    beqz a2, .LBB20_7
+; RV32-NEXT:  .LBB20_41: # %cond.store11
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v12, v8, 6
+; RV32-NEXT:    vmv.x.s a2, v12
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 12(a0)
+; RV32-NEXT:    andi a2, a1, 128
+; RV32-NEXT:    beqz a2, .LBB20_8
+; RV32-NEXT:  .LBB20_42: # %cond.store13
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v12, v8, 7
+; RV32-NEXT:    vmv.x.s a2, v12
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 14(a0)
+; RV32-NEXT:    andi a2, a1, 256
+; RV32-NEXT:    beqz a2, .LBB20_9
+; RV32-NEXT:  .LBB20_43: # %cond.store15
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v12, v8, 8
+; RV32-NEXT:    vmv.x.s a2, v12
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 16(a0)
+; RV32-NEXT:    andi a2, a1, 512
+; RV32-NEXT:    beqz a2, .LBB20_10
+; RV32-NEXT:  .LBB20_44: # %cond.store17
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v12, v8, 9
+; RV32-NEXT:    vmv.x.s a2, v12
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 18(a0)
+; RV32-NEXT:    andi a2, a1, 1024
+; RV32-NEXT:    beqz a2, .LBB20_11
+; RV32-NEXT:  .LBB20_45: # %cond.store19
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v12, v8, 10
+; RV32-NEXT:    vmv.x.s a2, v12
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 20(a0)
+; RV32-NEXT:    slli a2, a1, 20
+; RV32-NEXT:    bgez a2, .LBB20_12
+; RV32-NEXT:  .LBB20_46: # %cond.store21
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v12, v8, 11
+; RV32-NEXT:    vmv.x.s a2, v12
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 22(a0)
+; RV32-NEXT:    slli a2, a1, 19
+; RV32-NEXT:    bgez a2, .LBB20_13
+; RV32-NEXT:  .LBB20_47: # %cond.store23
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v12, v8, 12
+; RV32-NEXT:    vmv.x.s a2, v12
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 24(a0)
+; RV32-NEXT:    slli a2, a1, 18
+; RV32-NEXT:    bgez a2, .LBB20_14
+; RV32-NEXT:  .LBB20_48: # %cond.store25
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v12, v8, 13
+; RV32-NEXT:    vmv.x.s a2, v12
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 26(a0)
+; RV32-NEXT:    slli a2, a1, 17
+; RV32-NEXT:    bgez a2, .LBB20_15
+; RV32-NEXT:  .LBB20_49: # %cond.store27
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v12, v8, 14
+; RV32-NEXT:    vmv.x.s a2, v12
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 28(a0)
+; RV32-NEXT:    slli a2, a1, 16
+; RV32-NEXT:    bltz a2, .LBB20_16
+; RV32-NEXT:    j .LBB20_17
+; RV32-NEXT:  .LBB20_50: # %cond.store31
+; RV32-NEXT:    .cfi_restore_state
+; RV32-NEXT:    li a2, 32
+; RV32-NEXT:    addi a3, sp, 1024
+; RV32-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 1056(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 32(a0)
+; RV32-NEXT:    slli a2, a1, 14
+; RV32-NEXT:    bgez a2, .LBB20_19
+; RV32-NEXT:  .LBB20_51: # %cond.store33
+; RV32-NEXT:    li a2, 32
+; RV32-NEXT:    addi a3, sp, 960
+; RV32-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 994(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 34(a0)
+; RV32-NEXT:    slli a2, a1, 13
+; RV32-NEXT:    bgez a2, .LBB20_20
+; RV32-NEXT:  .LBB20_52: # %cond.store35
+; RV32-NEXT:    li a2, 32
+; RV32-NEXT:    addi a3, sp, 896
+; RV32-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 932(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 36(a0)
+; RV32-NEXT:    slli a2, a1, 12
+; RV32-NEXT:    bgez a2, .LBB20_21
+; RV32-NEXT:  .LBB20_53: # %cond.store37
+; RV32-NEXT:    li a2, 32
+; RV32-NEXT:    addi a3, sp, 832
+; RV32-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 870(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 38(a0)
+; RV32-NEXT:    slli a2, a1, 11
+; RV32-NEXT:    bgez a2, .LBB20_22
+; RV32-NEXT:  .LBB20_54: # %cond.store39
+; RV32-NEXT:    li a2, 32
+; RV32-NEXT:    addi a3, sp, 768
+; RV32-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 808(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 40(a0)
+; RV32-NEXT:    slli a2, a1, 10
+; RV32-NEXT:    bgez a2, .LBB20_23
+; RV32-NEXT:  .LBB20_55: # %cond.store41
+; RV32-NEXT:    li a2, 32
+; RV32-NEXT:    addi a3, sp, 704
+; RV32-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 746(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 42(a0)
+; RV32-NEXT:    slli a2, a1, 9
+; RV32-NEXT:    bgez a2, .LBB20_24
+; RV32-NEXT:  .LBB20_56: # %cond.store43
+; RV32-NEXT:    li a2, 32
+; RV32-NEXT:    addi a3, sp, 640
+; RV32-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 684(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 44(a0)
+; RV32-NEXT:    slli a2, a1, 8
+; RV32-NEXT:    bgez a2, .LBB20_25
+; RV32-NEXT:  .LBB20_57: # %cond.store45
+; RV32-NEXT:    li a2, 32
+; RV32-NEXT:    addi a3, sp, 576
+; RV32-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 622(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 46(a0)
+; RV32-NEXT:    slli a2, a1, 7
+; RV32-NEXT:    bgez a2, .LBB20_26
+; RV32-NEXT:  .LBB20_58: # %cond.store47
+; RV32-NEXT:    li a2, 32
+; RV32-NEXT:    addi a3, sp, 512
+; RV32-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 560(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 48(a0)
+; RV32-NEXT:    slli a2, a1, 6
+; RV32-NEXT:    bgez a2, .LBB20_27
+; RV32-NEXT:  .LBB20_59: # %cond.store49
+; RV32-NEXT:    li a2, 32
+; RV32-NEXT:    addi a3, sp, 448
+; RV32-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 498(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 50(a0)
+; RV32-NEXT:    slli a2, a1, 5
+; RV32-NEXT:    bgez a2, .LBB20_28
+; RV32-NEXT:  .LBB20_60: # %cond.store51
+; RV32-NEXT:    li a2, 32
+; RV32-NEXT:    addi a3, sp, 384
+; RV32-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 436(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 52(a0)
+; RV32-NEXT:    slli a2, a1, 4
+; RV32-NEXT:    bgez a2, .LBB20_29
+; RV32-NEXT:  .LBB20_61: # %cond.store53
+; RV32-NEXT:    li a2, 32
+; RV32-NEXT:    addi a3, sp, 320
+; RV32-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 374(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 54(a0)
+; RV32-NEXT:    slli a2, a1, 3
+; RV32-NEXT:    bgez a2, .LBB20_30
+; RV32-NEXT:  .LBB20_62: # %cond.store55
+; RV32-NEXT:    li a2, 32
+; RV32-NEXT:    addi a3, sp, 256
+; RV32-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 312(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 56(a0)
+; RV32-NEXT:    slli a2, a1, 2
+; RV32-NEXT:    bgez a2, .LBB20_31
+; RV32-NEXT:  .LBB20_63: # %cond.store57
+; RV32-NEXT:    li a2, 32
+; RV32-NEXT:    addi a3, sp, 192
+; RV32-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 250(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 58(a0)
+; RV32-NEXT:    slli a2, a1, 1
+; RV32-NEXT:    bgez a2, .LBB20_32
+; RV32-NEXT:  .LBB20_64: # %cond.store59
+; RV32-NEXT:    li a2, 32
+; RV32-NEXT:    addi a3, sp, 128
+; RV32-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 188(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 60(a0)
+; RV32-NEXT:    bltz a1, .LBB20_33
+; RV32-NEXT:    j .LBB20_34
 ;
-; RV64-LABEL: masked_store_v16f64:
+; RV64-LABEL: masked_store_v32bf16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT:    vle64.v v8, (a2)
-; RV64-NEXT:    vle64.v v16, (a0)
-; RV64-NEXT:    fmv.d.x fa5, zero
-; RV64-NEXT:    vmfeq.vf v0, v8, fa5
-; RV64-NEXT:    vse64.v v16, (a1), v0.t
+; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV64-NEXT:    vmv.x.s a1, v0
+; RV64-NEXT:    andi a2, a1, 1
+; RV64-NEXT:    bnez a2, .LBB20_36
+; RV64-NEXT:  # %bb.1: # %else
+; RV64-NEXT:    andi a2, a1, 2
+; RV64-NEXT:    bnez a2, .LBB20_37
+; RV64-NEXT:  .LBB20_2: # %else2
+; RV64-NEXT:    andi a2, a1, 4
+; RV64-NEXT:    bnez a2, .LBB20_38
+; RV64-NEXT:  .LBB20_3: # %else4
+; RV64-NEXT:    andi a2, a1, 8
+; RV64-NEXT:    bnez a2, .LBB20_39
+; RV64-NEXT:  .LBB20_4: # %else6
+; RV64-NEXT:    andi a2, a1, 16
+; RV64-NEXT:    bnez a2, .LBB20_40
+; RV64-NEXT:  .LBB20_5: # %else8
+; RV64-NEXT:    andi a2, a1, 32
+; RV64-NEXT:    bnez a2, .LBB20_41
+; RV64-NEXT:  .LBB20_6: # %else10
+; RV64-NEXT:    andi a2, a1, 64
+; RV64-NEXT:    bnez a2, .LBB20_42
+; RV64-NEXT:  .LBB20_7: # %else12
+; RV64-NEXT:    andi a2, a1, 128
+; RV64-NEXT:    bnez a2, .LBB20_43
+; RV64-NEXT:  .LBB20_8: # %else14
+; RV64-NEXT:    andi a2, a1, 256
+; RV64-NEXT:    bnez a2, .LBB20_44
+; RV64-NEXT:  .LBB20_9: # %else16
+; RV64-NEXT:    andi a2, a1, 512
+; RV64-NEXT:    bnez a2, .LBB20_45
+; RV64-NEXT:  .LBB20_10: # %else18
+; RV64-NEXT:    andi a2, a1, 1024
+; RV64-NEXT:    bnez a2, .LBB20_46
+; RV64-NEXT:  .LBB20_11: # %else20
+; RV64-NEXT:    slli a2, a1, 52
+; RV64-NEXT:    bltz a2, .LBB20_47
+; RV64-NEXT:  .LBB20_12: # %else22
+; RV64-NEXT:    slli a2, a1, 51
+; RV64-NEXT:    bltz a2, .LBB20_48
+; RV64-NEXT:  .LBB20_13: # %else24
+; RV64-NEXT:    slli a2, a1, 50
+; RV64-NEXT:    bltz a2, .LBB20_49
+; RV64-NEXT:  .LBB20_14: # %else26
+; RV64-NEXT:    slli a2, a1, 49
+; RV64-NEXT:    bltz a2, .LBB20_50
+; RV64-NEXT:  .LBB20_15: # %else28
+; RV64-NEXT:    slli a2, a1, 48
+; RV64-NEXT:    bgez a2, .LBB20_17
+; RV64-NEXT:  .LBB20_16: # %cond.store29
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v12, v8, 15
+; RV64-NEXT:    vmv.x.s a2, v12
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 30(a0)
+; RV64-NEXT:  .LBB20_17: # %else30
+; RV64-NEXT:    addi sp, sp, -1152
+; RV64-NEXT:    .cfi_def_cfa_offset 1152
+; RV64-NEXT:    sd ra, 1144(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s0, 1136(sp) # 8-byte Folded Spill
+; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    .cfi_offset s0, -16
+; RV64-NEXT:    addi s0, sp, 1152
+; RV64-NEXT:    .cfi_def_cfa s0, 0
+; RV64-NEXT:    .cfi_remember_state
+; RV64-NEXT:    andi sp, sp, -64
+; RV64-NEXT:    slli a2, a1, 47
+; RV64-NEXT:    bltz a2, .LBB20_51
+; RV64-NEXT:  # %bb.18: # %else32
+; RV64-NEXT:    slli a2, a1, 46
+; RV64-NEXT:    bltz a2, .LBB20_52
+; RV64-NEXT:  .LBB20_19: # %else34
+; RV64-NEXT:    slli a2, a1, 45
+; RV64-NEXT:    bltz a2, .LBB20_53
+; RV64-NEXT:  .LBB20_20: # %else36
+; RV64-NEXT:    slli a2, a1, 44
+; RV64-NEXT:    bltz a2, .LBB20_54
+; RV64-NEXT:  .LBB20_21: # %else38
+; RV64-NEXT:    slli a2, a1, 43
+; RV64-NEXT:    bltz a2, .LBB20_55
+; RV64-NEXT:  .LBB20_22: # %else40
+; RV64-NEXT:    slli a2, a1, 42
+; RV64-NEXT:    bltz a2, .LBB20_56
+; RV64-NEXT:  .LBB20_23: # %else42
+; RV64-NEXT:    slli a2, a1, 41
+; RV64-NEXT:    bltz a2, .LBB20_57
+; RV64-NEXT:  .LBB20_24: # %else44
+; RV64-NEXT:    slli a2, a1, 40
+; RV64-NEXT:    bltz a2, .LBB20_58
+; RV64-NEXT:  .LBB20_25: # %else46
+; RV64-NEXT:    slli a2, a1, 39
+; RV64-NEXT:    bltz a2, .LBB20_59
+; RV64-NEXT:  .LBB20_26: # %else48
+; RV64-NEXT:    slli a2, a1, 38
+; RV64-NEXT:    bltz a2, .LBB20_60
+; RV64-NEXT:  .LBB20_27: # %else50
+; RV64-NEXT:    slli a2, a1, 37
+; RV64-NEXT:    bltz a2, .LBB20_61
+; RV64-NEXT:  .LBB20_28: # %else52
+; RV64-NEXT:    slli a2, a1, 36
+; RV64-NEXT:    bltz a2, .LBB20_62
+; RV64-NEXT:  .LBB20_29: # %else54
+; RV64-NEXT:    slli a2, a1, 35
+; RV64-NEXT:    bltz a2, .LBB20_63
+; RV64-NEXT:  .LBB20_30: # %else56
+; RV64-NEXT:    slli a2, a1, 34
+; RV64-NEXT:    bltz a2, .LBB20_64
+; RV64-NEXT:  .LBB20_31: # %else58
+; RV64-NEXT:    slli a2, a1, 33
+; RV64-NEXT:    bgez a2, .LBB20_33
+; RV64-NEXT:  .LBB20_32: # %cond.store59
+; RV64-NEXT:    li a2, 32
+; RV64-NEXT:    addi a3, sp, 128
+; RV64-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 188(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 60(a0)
+; RV64-NEXT:  .LBB20_33: # %else60
+; RV64-NEXT:    lui a2, 524288
+; RV64-NEXT:    and a1, a1, a2
+; RV64-NEXT:    beqz a1, .LBB20_35
+; RV64-NEXT:  # %bb.34: # %cond.store61
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    addi a2, sp, 64
+; RV64-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
+; RV64-NEXT:    vse16.v v8, (a2)
+; RV64-NEXT:    lh a1, 126(sp)
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 62(a0)
+; RV64-NEXT:  .LBB20_35: # %else62
+; RV64-NEXT:    addi sp, s0, -1152
+; RV64-NEXT:    .cfi_def_cfa sp, 1152
+; RV64-NEXT:    ld ra, 1144(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 1136(sp) # 8-byte Folded Reload
+; RV64-NEXT:    .cfi_restore ra
+; RV64-NEXT:    .cfi_restore s0
+; RV64-NEXT:    addi sp, sp, 1152
+; RV64-NEXT:    .cfi_def_cfa_offset 0
 ; RV64-NEXT:    ret
-  %m = load <16 x double>, ptr %m_ptr
-  %mask = fcmp oeq <16 x double> %m, zeroinitializer
-  %val = load <16 x double>, ptr %val_ptr
-  call void @llvm.masked.store.v16f64.p0v16f64(<16 x double> %val, ptr %a, i32 8, <16 x i1> %mask)
+; RV64-NEXT:  .LBB20_36: # %cond.store
+; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; RV64-NEXT:    vmv.x.s a2, v8
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 0(a0)
+; RV64-NEXT:    andi a2, a1, 2
+; RV64-NEXT:    beqz a2, .LBB20_2
+; RV64-NEXT:  .LBB20_37: # %cond.store1
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v12, v8, 1
+; RV64-NEXT:    vmv.x.s a2, v12
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 2(a0)
+; RV64-NEXT:    andi a2, a1, 4
+; RV64-NEXT:    beqz a2, .LBB20_3
+; RV64-NEXT:  .LBB20_38: # %cond.store3
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v12, v8, 2
+; RV64-NEXT:    vmv.x.s a2, v12
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 4(a0)
+; RV64-NEXT:    andi a2, a1, 8
+; RV64-NEXT:    beqz a2, .LBB20_4
+; RV64-NEXT:  .LBB20_39: # %cond.store5
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v12, v8, 3
+; RV64-NEXT:    vmv.x.s a2, v12
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 6(a0)
+; RV64-NEXT:    andi a2, a1, 16
+; RV64-NEXT:    beqz a2, .LBB20_5
+; RV64-NEXT:  .LBB20_40: # %cond.store7
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v12, v8, 4
+; RV64-NEXT:    vmv.x.s a2, v12
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 8(a0)
+; RV64-NEXT:    andi a2, a1, 32
+; RV64-NEXT:    beqz a2, .LBB20_6
+; RV64-NEXT:  .LBB20_41: # %cond.store9
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v12, v8, 5
+; RV64-NEXT:    vmv.x.s a2, v12
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 10(a0)
+; RV64-NEXT:    andi a2, a1, 64
+; RV64-NEXT:    beqz a2, .LBB20_7
+; RV64-NEXT:  .LBB20_42: # %cond.store11
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v12, v8, 6
+; RV64-NEXT:    vmv.x.s a2, v12
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 12(a0)
+; RV64-NEXT:    andi a2, a1, 128
+; RV64-NEXT:    beqz a2, .LBB20_8
+; RV64-NEXT:  .LBB20_43: # %cond.store13
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v12, v8, 7
+; RV64-NEXT:    vmv.x.s a2, v12
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 14(a0)
+; RV64-NEXT:    andi a2, a1, 256
+; RV64-NEXT:    beqz a2, .LBB20_9
+; RV64-NEXT:  .LBB20_44: # %cond.store15
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v12, v8, 8
+; RV64-NEXT:    vmv.x.s a2, v12
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 16(a0)
+; RV64-NEXT:    andi a2, a1, 512
+; RV64-NEXT:    beqz a2, .LBB20_10
+; RV64-NEXT:  .LBB20_45: # %cond.store17
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v12, v8, 9
+; RV64-NEXT:    vmv.x.s a2, v12
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 18(a0)
+; RV64-NEXT:    andi a2, a1, 1024
+; RV64-NEXT:    beqz a2, .LBB20_11
+; RV64-NEXT:  .LBB20_46: # %cond.store19
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v12, v8, 10
+; RV64-NEXT:    vmv.x.s a2, v12
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 20(a0)
+; RV64-NEXT:    slli a2, a1, 52
+; RV64-NEXT:    bgez a2, .LBB20_12
+; RV64-NEXT:  .LBB20_47: # %cond.store21
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v12, v8, 11
+; RV64-NEXT:    vmv.x.s a2, v12
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 22(a0)
+; RV64-NEXT:    slli a2, a1, 51
+; RV64-NEXT:    bgez a2, .LBB20_13
+; RV64-NEXT:  .LBB20_48: # %cond.store23
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v12, v8, 12
+; RV64-NEXT:    vmv.x.s a2, v12
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 24(a0)
+; RV64-NEXT:    slli a2, a1, 50
+; RV64-NEXT:    bgez a2, .LBB20_14
+; RV64-NEXT:  .LBB20_49: # %cond.store25
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v12, v8, 13
+; RV64-NEXT:    vmv.x.s a2, v12
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 26(a0)
+; RV64-NEXT:    slli a2, a1, 49
+; RV64-NEXT:    bgez a2, .LBB20_15
+; RV64-NEXT:  .LBB20_50: # %cond.store27
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v12, v8, 14
+; RV64-NEXT:    vmv.x.s a2, v12
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 28(a0)
+; RV64-NEXT:    slli a2, a1, 48
+; RV64-NEXT:    bltz a2, .LBB20_16
+; RV64-NEXT:    j .LBB20_17
+; RV64-NEXT:  .LBB20_51: # %cond.store31
+; RV64-NEXT:    .cfi_restore_state
+; RV64-NEXT:    li a2, 32
+; RV64-NEXT:    addi a3, sp, 1024
+; RV64-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 1056(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 32(a0)
+; RV64-NEXT:    slli a2, a1, 46
+; RV64-NEXT:    bgez a2, .LBB20_19
+; RV64-NEXT:  .LBB20_52: # %cond.store33
+; RV64-NEXT:    li a2, 32
+; RV64-NEXT:    addi a3, sp, 960
+; RV64-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 994(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 34(a0)
+; RV64-NEXT:    slli a2, a1, 45
+; RV64-NEXT:    bgez a2, .LBB20_20
+; RV64-NEXT:  .LBB20_53: # %cond.store35
+; RV64-NEXT:    li a2, 32
+; RV64-NEXT:    addi a3, sp, 896
+; RV64-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 932(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 36(a0)
+; RV64-NEXT:    slli a2, a1, 44
+; RV64-NEXT:    bgez a2, .LBB20_21
+; RV64-NEXT:  .LBB20_54: # %cond.store37
+; RV64-NEXT:    li a2, 32
+; RV64-NEXT:    addi a3, sp, 832
+; RV64-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 870(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 38(a0)
+; RV64-NEXT:    slli a2, a1, 43
+; RV64-NEXT:    bgez a2, .LBB20_22
+; RV64-NEXT:  .LBB20_55: # %cond.store39
+; RV64-NEXT:    li a2, 32
+; RV64-NEXT:    addi a3, sp, 768
+; RV64-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 808(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 40(a0)
+; RV64-NEXT:    slli a2, a1, 42
+; RV64-NEXT:    bgez a2, .LBB20_23
+; RV64-NEXT:  .LBB20_56: # %cond.store41
+; RV64-NEXT:    li a2, 32
+; RV64-NEXT:    addi a3, sp, 704
+; RV64-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 746(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 42(a0)
+; RV64-NEXT:    slli a2, a1, 41
+; RV64-NEXT:    bgez a2, .LBB20_24
+; RV64-NEXT:  .LBB20_57: # %cond.store43
+; RV64-NEXT:    li a2, 32
+; RV64-NEXT:    addi a3, sp, 640
+; RV64-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 684(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 44(a0)
+; RV64-NEXT:    slli a2, a1, 40
+; RV64-NEXT:    bgez a2, .LBB20_25
+; RV64-NEXT:  .LBB20_58: # %cond.store45
+; RV64-NEXT:    li a2, 32
+; RV64-NEXT:    addi a3, sp, 576
+; RV64-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 622(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 46(a0)
+; RV64-NEXT:    slli a2, a1, 39
+; RV64-NEXT:    bgez a2, .LBB20_26
+; RV64-NEXT:  .LBB20_59: # %cond.store47
+; RV64-NEXT:    li a2, 32
+; RV64-NEXT:    addi a3, sp, 512
+; RV64-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 560(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 48(a0)
+; RV64-NEXT:    slli a2, a1, 38
+; RV64-NEXT:    bgez a2, .LBB20_27
+; RV64-NEXT:  .LBB20_60: # %cond.store49
+; RV64-NEXT:    li a2, 32
+; RV64-NEXT:    addi a3, sp, 448
+; RV64-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 498(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 50(a0)
+; RV64-NEXT:    slli a2, a1, 37
+; RV64-NEXT:    bgez a2, .LBB20_28
+; RV64-NEXT:  .LBB20_61: # %cond.store51
+; RV64-NEXT:    li a2, 32
+; RV64-NEXT:    addi a3, sp, 384
+; RV64-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 436(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 52(a0)
+; RV64-NEXT:    slli a2, a1, 36
+; RV64-NEXT:    bgez a2, .LBB20_29
+; RV64-NEXT:  .LBB20_62: # %cond.store53
+; RV64-NEXT:    li a2, 32
+; RV64-NEXT:    addi a3, sp, 320
+; RV64-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 374(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 54(a0)
+; RV64-NEXT:    slli a2, a1, 35
+; RV64-NEXT:    bgez a2, .LBB20_30
+; RV64-NEXT:  .LBB20_63: # %cond.store55
+; RV64-NEXT:    li a2, 32
+; RV64-NEXT:    addi a3, sp, 256
+; RV64-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 312(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 56(a0)
+; RV64-NEXT:    slli a2, a1, 34
+; RV64-NEXT:    bgez a2, .LBB20_31
+; RV64-NEXT:  .LBB20_64: # %cond.store57
+; RV64-NEXT:    li a2, 32
+; RV64-NEXT:    addi a3, sp, 192
+; RV64-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 250(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 58(a0)
+; RV64-NEXT:    slli a2, a1, 33
+; RV64-NEXT:    bltz a2, .LBB20_32
+; RV64-NEXT:    j .LBB20_33
+  call void @llvm.masked.store.v32bf16.p0(<32 x bfloat> %val, ptr %a, i32 8, <32 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v16f64.p0v16f64(<16 x double>, ptr, i32, <16 x i1>)
 
-define void @masked_store_v32f16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
-; CHECK-LABEL: masked_store_v32f16:
+define void @masked_store_v32f16(<32 x half> %val, ptr %a, <32 x i1> %mask) {
+; ZVFH-LABEL: masked_store_v32f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    li a1, 32
+; ZVFH-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
+; ZVFH-NEXT:    vse16.v v8, (a0), v0.t
+; ZVFH-NEXT:    ret
+;
+; RV32-ZVFHMIN-LABEL: masked_store_v32f16:
+; RV32-ZVFHMIN:       # %bb.0:
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v0
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 1
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB21_35
+; RV32-ZVFHMIN-NEXT:  # %bb.1: # %else
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 2
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB21_36
+; RV32-ZVFHMIN-NEXT:  .LBB21_2: # %else2
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 4
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB21_37
+; RV32-ZVFHMIN-NEXT:  .LBB21_3: # %else4
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 8
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB21_38
+; RV32-ZVFHMIN-NEXT:  .LBB21_4: # %else6
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 16
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB21_39
+; RV32-ZVFHMIN-NEXT:  .LBB21_5: # %else8
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 32
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB21_40
+; RV32-ZVFHMIN-NEXT:  .LBB21_6: # %else10
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 64
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB21_41
+; RV32-ZVFHMIN-NEXT:  .LBB21_7: # %else12
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 128
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB21_42
+; RV32-ZVFHMIN-NEXT:  .LBB21_8: # %else14
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 256
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB21_43
+; RV32-ZVFHMIN-NEXT:  .LBB21_9: # %else16
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 512
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB21_44
+; RV32-ZVFHMIN-NEXT:  .LBB21_10: # %else18
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 1024
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB21_45
+; RV32-ZVFHMIN-NEXT:  .LBB21_11: # %else20
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 20
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_46
+; RV32-ZVFHMIN-NEXT:  .LBB21_12: # %else22
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 19
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_47
+; RV32-ZVFHMIN-NEXT:  .LBB21_13: # %else24
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 18
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_48
+; RV32-ZVFHMIN-NEXT:  .LBB21_14: # %else26
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 17
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_49
+; RV32-ZVFHMIN-NEXT:  .LBB21_15: # %else28
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 16
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_17
+; RV32-ZVFHMIN-NEXT:  .LBB21_16: # %cond.store29
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 15
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 30(a0)
+; RV32-ZVFHMIN-NEXT:  .LBB21_17: # %else30
+; RV32-ZVFHMIN-NEXT:    addi sp, sp, -1152
+; RV32-ZVFHMIN-NEXT:    .cfi_def_cfa_offset 1152
+; RV32-ZVFHMIN-NEXT:    sw ra, 1148(sp) # 4-byte Folded Spill
+; RV32-ZVFHMIN-NEXT:    sw s0, 1144(sp) # 4-byte Folded Spill
+; RV32-ZVFHMIN-NEXT:    .cfi_offset ra, -4
+; RV32-ZVFHMIN-NEXT:    .cfi_offset s0, -8
+; RV32-ZVFHMIN-NEXT:    addi s0, sp, 1152
+; RV32-ZVFHMIN-NEXT:    .cfi_def_cfa s0, 0
+; RV32-ZVFHMIN-NEXT:    .cfi_remember_state
+; RV32-ZVFHMIN-NEXT:    andi sp, sp, -64
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 15
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_50
+; RV32-ZVFHMIN-NEXT:  # %bb.18: # %else32
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 14
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_51
+; RV32-ZVFHMIN-NEXT:  .LBB21_19: # %else34
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 13
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_52
+; RV32-ZVFHMIN-NEXT:  .LBB21_20: # %else36
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 12
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_53
+; RV32-ZVFHMIN-NEXT:  .LBB21_21: # %else38
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 11
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_54
+; RV32-ZVFHMIN-NEXT:  .LBB21_22: # %else40
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 10
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_55
+; RV32-ZVFHMIN-NEXT:  .LBB21_23: # %else42
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 9
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_56
+; RV32-ZVFHMIN-NEXT:  .LBB21_24: # %else44
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 8
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_57
+; RV32-ZVFHMIN-NEXT:  .LBB21_25: # %else46
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 7
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_58
+; RV32-ZVFHMIN-NEXT:  .LBB21_26: # %else48
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 6
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_59
+; RV32-ZVFHMIN-NEXT:  .LBB21_27: # %else50
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 5
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_60
+; RV32-ZVFHMIN-NEXT:  .LBB21_28: # %else52
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 4
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_61
+; RV32-ZVFHMIN-NEXT:  .LBB21_29: # %else54
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 3
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_62
+; RV32-ZVFHMIN-NEXT:  .LBB21_30: # %else56
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 2
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_63
+; RV32-ZVFHMIN-NEXT:  .LBB21_31: # %else58
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 1
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_64
+; RV32-ZVFHMIN-NEXT:  .LBB21_32: # %else60
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB21_34
+; RV32-ZVFHMIN-NEXT:  .LBB21_33: # %cond.store61
+; RV32-ZVFHMIN-NEXT:    li a1, 32
+; RV32-ZVFHMIN-NEXT:    addi a2, sp, 64
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a2)
+; RV32-ZVFHMIN-NEXT:    lh a1, 126(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 62(a0)
+; RV32-ZVFHMIN-NEXT:  .LBB21_34: # %else62
+; RV32-ZVFHMIN-NEXT:    addi sp, s0, -1152
+; RV32-ZVFHMIN-NEXT:    .cfi_def_cfa sp, 1152
+; RV32-ZVFHMIN-NEXT:    lw ra, 1148(sp) # 4-byte Folded Reload
+; RV32-ZVFHMIN-NEXT:    lw s0, 1144(sp) # 4-byte Folded Reload
+; RV32-ZVFHMIN-NEXT:    .cfi_restore ra
+; RV32-ZVFHMIN-NEXT:    .cfi_restore s0
+; RV32-ZVFHMIN-NEXT:    addi sp, sp, 1152
+; RV32-ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; RV32-ZVFHMIN-NEXT:    ret
+; RV32-ZVFHMIN-NEXT:  .LBB21_35: # %cond.store
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 0(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 2
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB21_2
+; RV32-ZVFHMIN-NEXT:  .LBB21_36: # %cond.store1
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 1
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 2(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 4
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB21_3
+; RV32-ZVFHMIN-NEXT:  .LBB21_37: # %cond.store3
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 2
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 4(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 8
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB21_4
+; RV32-ZVFHMIN-NEXT:  .LBB21_38: # %cond.store5
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 3
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 6(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 16
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB21_5
+; RV32-ZVFHMIN-NEXT:  .LBB21_39: # %cond.store7
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 4
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 8(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 32
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB21_6
+; RV32-ZVFHMIN-NEXT:  .LBB21_40: # %cond.store9
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 5
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 10(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 64
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB21_7
+; RV32-ZVFHMIN-NEXT:  .LBB21_41: # %cond.store11
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 6
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 12(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 128
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB21_8
+; RV32-ZVFHMIN-NEXT:  .LBB21_42: # %cond.store13
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 7
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 14(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 256
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB21_9
+; RV32-ZVFHMIN-NEXT:  .LBB21_43: # %cond.store15
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 8
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 16(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 512
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB21_10
+; RV32-ZVFHMIN-NEXT:  .LBB21_44: # %cond.store17
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 9
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 18(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a1, 1024
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB21_11
+; RV32-ZVFHMIN-NEXT:  .LBB21_45: # %cond.store19
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 10
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 20(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 20
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_12
+; RV32-ZVFHMIN-NEXT:  .LBB21_46: # %cond.store21
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 11
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 22(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 19
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_13
+; RV32-ZVFHMIN-NEXT:  .LBB21_47: # %cond.store23
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 12
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 24(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 18
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_14
+; RV32-ZVFHMIN-NEXT:  .LBB21_48: # %cond.store25
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 13
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 26(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 17
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_15
+; RV32-ZVFHMIN-NEXT:  .LBB21_49: # %cond.store27
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 14
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 28(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 16
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB21_16
+; RV32-ZVFHMIN-NEXT:    j .LBB21_17
+; RV32-ZVFHMIN-NEXT:  .LBB21_50: # %cond.store31
+; RV32-ZVFHMIN-NEXT:    .cfi_restore_state
+; RV32-ZVFHMIN-NEXT:    li a2, 32
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 1024
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 1056(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 32(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 14
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_19
+; RV32-ZVFHMIN-NEXT:  .LBB21_51: # %cond.store33
+; RV32-ZVFHMIN-NEXT:    li a2, 32
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 960
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 994(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 34(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 13
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_20
+; RV32-ZVFHMIN-NEXT:  .LBB21_52: # %cond.store35
+; RV32-ZVFHMIN-NEXT:    li a2, 32
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 896
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 932(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 36(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 12
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_21
+; RV32-ZVFHMIN-NEXT:  .LBB21_53: # %cond.store37
+; RV32-ZVFHMIN-NEXT:    li a2, 32
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 832
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 870(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 38(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 11
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_22
+; RV32-ZVFHMIN-NEXT:  .LBB21_54: # %cond.store39
+; RV32-ZVFHMIN-NEXT:    li a2, 32
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 768
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 808(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 40(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 10
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_23
+; RV32-ZVFHMIN-NEXT:  .LBB21_55: # %cond.store41
+; RV32-ZVFHMIN-NEXT:    li a2, 32
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 704
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 746(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 42(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 9
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_24
+; RV32-ZVFHMIN-NEXT:  .LBB21_56: # %cond.store43
+; RV32-ZVFHMIN-NEXT:    li a2, 32
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 640
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 684(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 44(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 8
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_25
+; RV32-ZVFHMIN-NEXT:  .LBB21_57: # %cond.store45
+; RV32-ZVFHMIN-NEXT:    li a2, 32
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 576
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 622(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 46(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 7
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_26
+; RV32-ZVFHMIN-NEXT:  .LBB21_58: # %cond.store47
+; RV32-ZVFHMIN-NEXT:    li a2, 32
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 512
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 560(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 48(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 6
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_27
+; RV32-ZVFHMIN-NEXT:  .LBB21_59: # %cond.store49
+; RV32-ZVFHMIN-NEXT:    li a2, 32
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 448
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 498(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 50(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 5
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_28
+; RV32-ZVFHMIN-NEXT:  .LBB21_60: # %cond.store51
+; RV32-ZVFHMIN-NEXT:    li a2, 32
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 384
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 436(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 52(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 4
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_29
+; RV32-ZVFHMIN-NEXT:  .LBB21_61: # %cond.store53
+; RV32-ZVFHMIN-NEXT:    li a2, 32
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 320
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 374(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 54(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 3
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_30
+; RV32-ZVFHMIN-NEXT:  .LBB21_62: # %cond.store55
+; RV32-ZVFHMIN-NEXT:    li a2, 32
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 256
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 312(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 56(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 2
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_31
+; RV32-ZVFHMIN-NEXT:  .LBB21_63: # %cond.store57
+; RV32-ZVFHMIN-NEXT:    li a2, 32
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 192
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 250(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 58(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 1
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB21_32
+; RV32-ZVFHMIN-NEXT:  .LBB21_64: # %cond.store59
+; RV32-ZVFHMIN-NEXT:    li a2, 32
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 128
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 188(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 60(a0)
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB21_33
+; RV32-ZVFHMIN-NEXT:    j .LBB21_34
+;
+; RV64-ZVFHMIN-LABEL: masked_store_v32f16:
+; RV64-ZVFHMIN:       # %bb.0:
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v0
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 1
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB21_36
+; RV64-ZVFHMIN-NEXT:  # %bb.1: # %else
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 2
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB21_37
+; RV64-ZVFHMIN-NEXT:  .LBB21_2: # %else2
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 4
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB21_38
+; RV64-ZVFHMIN-NEXT:  .LBB21_3: # %else4
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 8
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB21_39
+; RV64-ZVFHMIN-NEXT:  .LBB21_4: # %else6
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 16
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB21_40
+; RV64-ZVFHMIN-NEXT:  .LBB21_5: # %else8
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 32
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB21_41
+; RV64-ZVFHMIN-NEXT:  .LBB21_6: # %else10
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 64
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB21_42
+; RV64-ZVFHMIN-NEXT:  .LBB21_7: # %else12
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 128
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB21_43
+; RV64-ZVFHMIN-NEXT:  .LBB21_8: # %else14
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 256
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB21_44
+; RV64-ZVFHMIN-NEXT:  .LBB21_9: # %else16
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 512
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB21_45
+; RV64-ZVFHMIN-NEXT:  .LBB21_10: # %else18
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 1024
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB21_46
+; RV64-ZVFHMIN-NEXT:  .LBB21_11: # %else20
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 52
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_47
+; RV64-ZVFHMIN-NEXT:  .LBB21_12: # %else22
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 51
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_48
+; RV64-ZVFHMIN-NEXT:  .LBB21_13: # %else24
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 50
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_49
+; RV64-ZVFHMIN-NEXT:  .LBB21_14: # %else26
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 49
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_50
+; RV64-ZVFHMIN-NEXT:  .LBB21_15: # %else28
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 48
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_17
+; RV64-ZVFHMIN-NEXT:  .LBB21_16: # %cond.store29
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 15
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 30(a0)
+; RV64-ZVFHMIN-NEXT:  .LBB21_17: # %else30
+; RV64-ZVFHMIN-NEXT:    addi sp, sp, -1152
+; RV64-ZVFHMIN-NEXT:    .cfi_def_cfa_offset 1152
+; RV64-ZVFHMIN-NEXT:    sd ra, 1144(sp) # 8-byte Folded Spill
+; RV64-ZVFHMIN-NEXT:    sd s0, 1136(sp) # 8-byte Folded Spill
+; RV64-ZVFHMIN-NEXT:    .cfi_offset ra, -8
+; RV64-ZVFHMIN-NEXT:    .cfi_offset s0, -16
+; RV64-ZVFHMIN-NEXT:    addi s0, sp, 1152
+; RV64-ZVFHMIN-NEXT:    .cfi_def_cfa s0, 0
+; RV64-ZVFHMIN-NEXT:    .cfi_remember_state
+; RV64-ZVFHMIN-NEXT:    andi sp, sp, -64
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 47
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_51
+; RV64-ZVFHMIN-NEXT:  # %bb.18: # %else32
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 46
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_52
+; RV64-ZVFHMIN-NEXT:  .LBB21_19: # %else34
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 45
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_53
+; RV64-ZVFHMIN-NEXT:  .LBB21_20: # %else36
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 44
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_54
+; RV64-ZVFHMIN-NEXT:  .LBB21_21: # %else38
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 43
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_55
+; RV64-ZVFHMIN-NEXT:  .LBB21_22: # %else40
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 42
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_56
+; RV64-ZVFHMIN-NEXT:  .LBB21_23: # %else42
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 41
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_57
+; RV64-ZVFHMIN-NEXT:  .LBB21_24: # %else44
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 40
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_58
+; RV64-ZVFHMIN-NEXT:  .LBB21_25: # %else46
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 39
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_59
+; RV64-ZVFHMIN-NEXT:  .LBB21_26: # %else48
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 38
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_60
+; RV64-ZVFHMIN-NEXT:  .LBB21_27: # %else50
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 37
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_61
+; RV64-ZVFHMIN-NEXT:  .LBB21_28: # %else52
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 36
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_62
+; RV64-ZVFHMIN-NEXT:  .LBB21_29: # %else54
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 35
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_63
+; RV64-ZVFHMIN-NEXT:  .LBB21_30: # %else56
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 34
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_64
+; RV64-ZVFHMIN-NEXT:  .LBB21_31: # %else58
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 33
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_33
+; RV64-ZVFHMIN-NEXT:  .LBB21_32: # %cond.store59
+; RV64-ZVFHMIN-NEXT:    li a2, 32
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 128
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 188(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 60(a0)
+; RV64-ZVFHMIN-NEXT:  .LBB21_33: # %else60
+; RV64-ZVFHMIN-NEXT:    lui a2, 524288
+; RV64-ZVFHMIN-NEXT:    and a1, a1, a2
+; RV64-ZVFHMIN-NEXT:    beqz a1, .LBB21_35
+; RV64-ZVFHMIN-NEXT:  # %bb.34: # %cond.store61
+; RV64-ZVFHMIN-NEXT:    li a1, 32
+; RV64-ZVFHMIN-NEXT:    addi a2, sp, 64
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a2)
+; RV64-ZVFHMIN-NEXT:    lh a1, 126(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 62(a0)
+; RV64-ZVFHMIN-NEXT:  .LBB21_35: # %else62
+; RV64-ZVFHMIN-NEXT:    addi sp, s0, -1152
+; RV64-ZVFHMIN-NEXT:    .cfi_def_cfa sp, 1152
+; RV64-ZVFHMIN-NEXT:    ld ra, 1144(sp) # 8-byte Folded Reload
+; RV64-ZVFHMIN-NEXT:    ld s0, 1136(sp) # 8-byte Folded Reload
+; RV64-ZVFHMIN-NEXT:    .cfi_restore ra
+; RV64-ZVFHMIN-NEXT:    .cfi_restore s0
+; RV64-ZVFHMIN-NEXT:    addi sp, sp, 1152
+; RV64-ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; RV64-ZVFHMIN-NEXT:    ret
+; RV64-ZVFHMIN-NEXT:  .LBB21_36: # %cond.store
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 0(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 2
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB21_2
+; RV64-ZVFHMIN-NEXT:  .LBB21_37: # %cond.store1
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 1
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 2(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 4
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB21_3
+; RV64-ZVFHMIN-NEXT:  .LBB21_38: # %cond.store3
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 2
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 4(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 8
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB21_4
+; RV64-ZVFHMIN-NEXT:  .LBB21_39: # %cond.store5
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 3
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 6(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 16
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB21_5
+; RV64-ZVFHMIN-NEXT:  .LBB21_40: # %cond.store7
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 4
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 8(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 32
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB21_6
+; RV64-ZVFHMIN-NEXT:  .LBB21_41: # %cond.store9
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 5
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 10(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 64
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB21_7
+; RV64-ZVFHMIN-NEXT:  .LBB21_42: # %cond.store11
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 6
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 12(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 128
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB21_8
+; RV64-ZVFHMIN-NEXT:  .LBB21_43: # %cond.store13
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 7
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 14(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 256
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB21_9
+; RV64-ZVFHMIN-NEXT:  .LBB21_44: # %cond.store15
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 8
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 16(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 512
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB21_10
+; RV64-ZVFHMIN-NEXT:  .LBB21_45: # %cond.store17
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 9
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 18(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 1024
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB21_11
+; RV64-ZVFHMIN-NEXT:  .LBB21_46: # %cond.store19
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 10
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 20(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 52
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_12
+; RV64-ZVFHMIN-NEXT:  .LBB21_47: # %cond.store21
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 11
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 22(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 51
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_13
+; RV64-ZVFHMIN-NEXT:  .LBB21_48: # %cond.store23
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 12
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 24(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 50
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_14
+; RV64-ZVFHMIN-NEXT:  .LBB21_49: # %cond.store25
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 13
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 26(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 49
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_15
+; RV64-ZVFHMIN-NEXT:  .LBB21_50: # %cond.store27
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v12, v8, 14
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v12
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 28(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 48
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_16
+; RV64-ZVFHMIN-NEXT:    j .LBB21_17
+; RV64-ZVFHMIN-NEXT:  .LBB21_51: # %cond.store31
+; RV64-ZVFHMIN-NEXT:    .cfi_restore_state
+; RV64-ZVFHMIN-NEXT:    li a2, 32
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 1024
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 1056(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 32(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 46
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_19
+; RV64-ZVFHMIN-NEXT:  .LBB21_52: # %cond.store33
+; RV64-ZVFHMIN-NEXT:    li a2, 32
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 960
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 994(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 34(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 45
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_20
+; RV64-ZVFHMIN-NEXT:  .LBB21_53: # %cond.store35
+; RV64-ZVFHMIN-NEXT:    li a2, 32
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 896
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 932(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 36(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 44
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_21
+; RV64-ZVFHMIN-NEXT:  .LBB21_54: # %cond.store37
+; RV64-ZVFHMIN-NEXT:    li a2, 32
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 832
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 870(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 38(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 43
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_22
+; RV64-ZVFHMIN-NEXT:  .LBB21_55: # %cond.store39
+; RV64-ZVFHMIN-NEXT:    li a2, 32
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 768
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 808(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 40(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 42
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_23
+; RV64-ZVFHMIN-NEXT:  .LBB21_56: # %cond.store41
+; RV64-ZVFHMIN-NEXT:    li a2, 32
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 704
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 746(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 42(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 41
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_24
+; RV64-ZVFHMIN-NEXT:  .LBB21_57: # %cond.store43
+; RV64-ZVFHMIN-NEXT:    li a2, 32
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 640
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 684(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 44(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 40
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_25
+; RV64-ZVFHMIN-NEXT:  .LBB21_58: # %cond.store45
+; RV64-ZVFHMIN-NEXT:    li a2, 32
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 576
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 622(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 46(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 39
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_26
+; RV64-ZVFHMIN-NEXT:  .LBB21_59: # %cond.store47
+; RV64-ZVFHMIN-NEXT:    li a2, 32
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 512
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 560(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 48(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 38
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_27
+; RV64-ZVFHMIN-NEXT:  .LBB21_60: # %cond.store49
+; RV64-ZVFHMIN-NEXT:    li a2, 32
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 448
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 498(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 50(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 37
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_28
+; RV64-ZVFHMIN-NEXT:  .LBB21_61: # %cond.store51
+; RV64-ZVFHMIN-NEXT:    li a2, 32
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 384
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 436(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 52(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 36
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_29
+; RV64-ZVFHMIN-NEXT:  .LBB21_62: # %cond.store53
+; RV64-ZVFHMIN-NEXT:    li a2, 32
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 320
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 374(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 54(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 35
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_30
+; RV64-ZVFHMIN-NEXT:  .LBB21_63: # %cond.store55
+; RV64-ZVFHMIN-NEXT:    li a2, 32
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 256
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 312(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 56(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 34
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB21_31
+; RV64-ZVFHMIN-NEXT:  .LBB21_64: # %cond.store57
+; RV64-ZVFHMIN-NEXT:    li a2, 32
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 192
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 250(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 58(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 33
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB21_32
+; RV64-ZVFHMIN-NEXT:    j .LBB21_33
+  call void @llvm.masked.store.v32f16.p0(<32 x half> %val, ptr %a, i32 8, <32 x i1> %mask)
+  ret void
+}
+
+define void @masked_store_v32f32(<32 x float> %val, ptr %a, <32 x i1> %mask) {
+; CHECK-LABEL: masked_store_v32f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a3, 32
-; CHECK-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a2)
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    fmv.h.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
-; CHECK-NEXT:    vse16.v v12, (a1), v0.t
+; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT:    vse32.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <32 x half>, ptr %m_ptr
-  %mask = fcmp oeq <32 x half> %m, zeroinitializer
-  %val = load <32 x half>, ptr %val_ptr
-  call void @llvm.masked.store.v32f16.p0v32f16(<32 x half> %val, ptr %a, i32 8, <32 x i1> %mask)
+  call void @llvm.masked.store.v32f32.p0(<32 x float> %val, ptr %a, i32 8, <32 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v32f16.p0v32f16(<32 x half>, ptr, i32, <32 x i1>)
 
-define void @masked_store_v32f32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
-; CHECK-LABEL: masked_store_v32f32:
+define void @masked_store_v32f64(<32 x double> %val, ptr %a, <32 x i1> %mask) {
+; CHECK-LABEL: masked_store_v32f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a3, 32
-; CHECK-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a2)
-; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    fmv.w.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
-; CHECK-NEXT:    vse32.v v16, (a1), v0.t
+; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT:    vse64.v v8, (a0), v0.t
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; CHECK-NEXT:    vslidedown.vi v0, v0, 2
+; CHECK-NEXT:    addi a0, a0, 128
+; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT:    vse64.v v16, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <32 x float>, ptr %m_ptr
-  %mask = fcmp oeq <32 x float> %m, zeroinitializer
-  %val = load <32 x float>, ptr %val_ptr
-  call void @llvm.masked.store.v32f32.p0v32f32(<32 x float> %val, ptr %a, i32 8, <32 x i1> %mask)
+  call void @llvm.masked.store.v32f64.p0(<32 x double> %val, ptr %a, i32 8, <32 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v32f32.p0v32f32(<32 x float>, ptr, i32, <32 x i1>)
 
-define void @masked_store_v32f64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
-; RV32-LABEL: masked_store_v32f64:
+define void @masked_store_v64bf16(<64 x bfloat> %val, ptr %a, <64 x i1> %mask) {
+; RV32-LABEL: masked_store_v64bf16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 4
-; RV32-NEXT:    sub sp, sp, a3
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vle64.v v8, (a2)
-; RV32-NEXT:    addi a2, a2, 128
-; RV32-NEXT:    vle64.v v16, (a2)
-; RV32-NEXT:    csrr a2, vlenb
-; RV32-NEXT:    slli a2, a2, 3
-; RV32-NEXT:    add a2, sp, a2
-; RV32-NEXT:    addi a2, a2, 16
-; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
-; RV32-NEXT:    fcvt.d.w fa5, zero
-; RV32-NEXT:    vmfeq.vf v0, v8, fa5
-; RV32-NEXT:    vle64.v v24, (a0)
-; RV32-NEXT:    addi a0, a0, 128
-; RV32-NEXT:    vle64.v v8, (a0)
-; RV32-NEXT:    addi a0, sp, 16
-; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vmfeq.vf v8, v16, fa5
-; RV32-NEXT:    vse64.v v24, (a1), v0.t
-; RV32-NEXT:    addi a0, a1, 128
-; RV32-NEXT:    vmv1r.v v0, v8
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-NEXT:    vse64.v v8, (a0), v0.t
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
-; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vmv.x.s a2, v0
+; RV32-NEXT:    andi a1, a2, 1
+; RV32-NEXT:    bnez a1, .LBB24_71
+; RV32-NEXT:  # %bb.1: # %else
+; RV32-NEXT:    andi a1, a2, 2
+; RV32-NEXT:    bnez a1, .LBB24_72
+; RV32-NEXT:  .LBB24_2: # %else2
+; RV32-NEXT:    andi a1, a2, 4
+; RV32-NEXT:    bnez a1, .LBB24_73
+; RV32-NEXT:  .LBB24_3: # %else4
+; RV32-NEXT:    andi a1, a2, 8
+; RV32-NEXT:    bnez a1, .LBB24_74
+; RV32-NEXT:  .LBB24_4: # %else6
+; RV32-NEXT:    andi a1, a2, 16
+; RV32-NEXT:    bnez a1, .LBB24_75
+; RV32-NEXT:  .LBB24_5: # %else8
+; RV32-NEXT:    andi a1, a2, 32
+; RV32-NEXT:    bnez a1, .LBB24_76
+; RV32-NEXT:  .LBB24_6: # %else10
+; RV32-NEXT:    andi a1, a2, 64
+; RV32-NEXT:    bnez a1, .LBB24_77
+; RV32-NEXT:  .LBB24_7: # %else12
+; RV32-NEXT:    andi a1, a2, 128
+; RV32-NEXT:    bnez a1, .LBB24_78
+; RV32-NEXT:  .LBB24_8: # %else14
+; RV32-NEXT:    andi a1, a2, 256
+; RV32-NEXT:    bnez a1, .LBB24_79
+; RV32-NEXT:  .LBB24_9: # %else16
+; RV32-NEXT:    andi a1, a2, 512
+; RV32-NEXT:    bnez a1, .LBB24_80
+; RV32-NEXT:  .LBB24_10: # %else18
+; RV32-NEXT:    andi a1, a2, 1024
+; RV32-NEXT:    bnez a1, .LBB24_81
+; RV32-NEXT:  .LBB24_11: # %else20
+; RV32-NEXT:    slli a1, a2, 20
+; RV32-NEXT:    bltz a1, .LBB24_82
+; RV32-NEXT:  .LBB24_12: # %else22
+; RV32-NEXT:    slli a1, a2, 19
+; RV32-NEXT:    bltz a1, .LBB24_83
+; RV32-NEXT:  .LBB24_13: # %else24
+; RV32-NEXT:    slli a1, a2, 18
+; RV32-NEXT:    bltz a1, .LBB24_84
+; RV32-NEXT:  .LBB24_14: # %else26
+; RV32-NEXT:    slli a1, a2, 17
+; RV32-NEXT:    bltz a1, .LBB24_85
+; RV32-NEXT:  .LBB24_15: # %else28
+; RV32-NEXT:    slli a1, a2, 16
+; RV32-NEXT:    bgez a1, .LBB24_17
+; RV32-NEXT:  .LBB24_16: # %cond.store29
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v16, v8, 15
+; RV32-NEXT:    vmv.x.s a1, v16
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 30(a0)
+; RV32-NEXT:  .LBB24_17: # %else30
+; RV32-NEXT:    addi sp, sp, -2032
+; RV32-NEXT:    .cfi_def_cfa_offset 2032
+; RV32-NEXT:    sw ra, 2028(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 2024(sp) # 4-byte Folded Spill
+; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    .cfi_offset s0, -8
+; RV32-NEXT:    addi s0, sp, 2032
+; RV32-NEXT:    .cfi_def_cfa s0, 0
+; RV32-NEXT:    .cfi_remember_state
+; RV32-NEXT:    lui a1, 1
+; RV32-NEXT:    addi a1, a1, 272
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    andi sp, sp, -128
+; RV32-NEXT:    slli a1, a2, 15
+; RV32-NEXT:    lui a3, 1
+; RV32-NEXT:    addi a3, a3, 190
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    bltz a1, .LBB24_86
+; RV32-NEXT:  # %bb.18: # %else32
+; RV32-NEXT:    slli a1, a2, 14
+; RV32-NEXT:    bltz a1, .LBB24_87
+; RV32-NEXT:  .LBB24_19: # %else34
+; RV32-NEXT:    slli a1, a2, 13
+; RV32-NEXT:    bltz a1, .LBB24_88
+; RV32-NEXT:  .LBB24_20: # %else36
+; RV32-NEXT:    slli a1, a2, 12
+; RV32-NEXT:    bltz a1, .LBB24_89
+; RV32-NEXT:  .LBB24_21: # %else38
+; RV32-NEXT:    slli a1, a2, 11
+; RV32-NEXT:    bltz a1, .LBB24_90
+; RV32-NEXT:  .LBB24_22: # %else40
+; RV32-NEXT:    slli a1, a2, 10
+; RV32-NEXT:    bltz a1, .LBB24_91
+; RV32-NEXT:  .LBB24_23: # %else42
+; RV32-NEXT:    slli a1, a2, 9
+; RV32-NEXT:    bltz a1, .LBB24_92
+; RV32-NEXT:  .LBB24_24: # %else44
+; RV32-NEXT:    slli a1, a2, 8
+; RV32-NEXT:    bltz a1, .LBB24_93
+; RV32-NEXT:  .LBB24_25: # %else46
+; RV32-NEXT:    slli a1, a2, 7
+; RV32-NEXT:    bltz a1, .LBB24_94
+; RV32-NEXT:  .LBB24_26: # %else48
+; RV32-NEXT:    slli a1, a2, 6
+; RV32-NEXT:    bltz a1, .LBB24_95
+; RV32-NEXT:  .LBB24_27: # %else50
+; RV32-NEXT:    slli a1, a2, 5
+; RV32-NEXT:    bltz a1, .LBB24_96
+; RV32-NEXT:  .LBB24_28: # %else52
+; RV32-NEXT:    slli a1, a2, 4
+; RV32-NEXT:    bltz a1, .LBB24_97
+; RV32-NEXT:  .LBB24_29: # %else54
+; RV32-NEXT:    slli a1, a2, 3
+; RV32-NEXT:    bgez a1, .LBB24_31
+; RV32-NEXT:  .LBB24_30: # %cond.store55
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    li a4, 9
+; RV32-NEXT:    slli a4, a4, 9
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a1, 378(a3)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 56(a0)
+; RV32-NEXT:  .LBB24_31: # %else56
+; RV32-NEXT:    slli a4, a2, 2
+; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    bgez a4, .LBB24_33
+; RV32-NEXT:  # %bb.32: # %cond.store57
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    lui a5, 1
+; RV32-NEXT:    addi a5, a5, 384
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a4, 252(a3)
+; RV32-NEXT:    fmv.h.x fa5, a4
+; RV32-NEXT:    fsh fa5, 58(a0)
+; RV32-NEXT:  .LBB24_33: # %else58
+; RV32-NEXT:    slli a4, a2, 1
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vsrl.vx v16, v0, a1
+; RV32-NEXT:    bgez a4, .LBB24_35
+; RV32-NEXT:  # %bb.34: # %cond.store59
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    li a4, 17
+; RV32-NEXT:    slli a4, a4, 8
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a1, 126(a3)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 60(a0)
+; RV32-NEXT:  .LBB24_35: # %else60
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vmv.x.s a1, v16
+; RV32-NEXT:    bgez a2, .LBB24_37
+; RV32-NEXT:  # %bb.36: # %cond.store61
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    lui a4, 1
+; RV32-NEXT:    addi a4, a4, 128
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a2, 0(a3)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 62(a0)
+; RV32-NEXT:  .LBB24_37: # %else62
+; RV32-NEXT:    andi a3, a1, 1
+; RV32-NEXT:    addi a2, sp, 2047
+; RV32-NEXT:    addi a2, a2, 97
+; RV32-NEXT:    bnez a3, .LBB24_98
+; RV32-NEXT:  # %bb.38: # %else64
+; RV32-NEXT:    andi a3, a1, 2
+; RV32-NEXT:    bnez a3, .LBB24_99
+; RV32-NEXT:  .LBB24_39: # %else66
+; RV32-NEXT:    andi a3, a1, 4
+; RV32-NEXT:    bnez a3, .LBB24_100
+; RV32-NEXT:  .LBB24_40: # %else68
+; RV32-NEXT:    andi a3, a1, 8
+; RV32-NEXT:    bnez a3, .LBB24_101
+; RV32-NEXT:  .LBB24_41: # %else70
+; RV32-NEXT:    andi a3, a1, 16
+; RV32-NEXT:    bnez a3, .LBB24_102
+; RV32-NEXT:  .LBB24_42: # %else72
+; RV32-NEXT:    andi a3, a1, 32
+; RV32-NEXT:    bnez a3, .LBB24_103
+; RV32-NEXT:  .LBB24_43: # %else74
+; RV32-NEXT:    andi a3, a1, 64
+; RV32-NEXT:    bnez a3, .LBB24_104
+; RV32-NEXT:  .LBB24_44: # %else76
+; RV32-NEXT:    andi a3, a1, 128
+; RV32-NEXT:    bnez a3, .LBB24_105
+; RV32-NEXT:  .LBB24_45: # %else78
+; RV32-NEXT:    andi a3, a1, 256
+; RV32-NEXT:    bnez a3, .LBB24_106
+; RV32-NEXT:  .LBB24_46: # %else80
+; RV32-NEXT:    andi a3, a1, 512
+; RV32-NEXT:    bnez a3, .LBB24_107
+; RV32-NEXT:  .LBB24_47: # %else82
+; RV32-NEXT:    andi a3, a1, 1024
+; RV32-NEXT:    bnez a3, .LBB24_108
+; RV32-NEXT:  .LBB24_48: # %else84
+; RV32-NEXT:    slli a3, a1, 20
+; RV32-NEXT:    bltz a3, .LBB24_109
+; RV32-NEXT:  .LBB24_49: # %else86
+; RV32-NEXT:    slli a3, a1, 19
+; RV32-NEXT:    bltz a3, .LBB24_110
+; RV32-NEXT:  .LBB24_50: # %else88
+; RV32-NEXT:    slli a3, a1, 18
+; RV32-NEXT:    bltz a3, .LBB24_111
+; RV32-NEXT:  .LBB24_51: # %else90
+; RV32-NEXT:    slli a3, a1, 17
+; RV32-NEXT:    bltz a3, .LBB24_112
+; RV32-NEXT:  .LBB24_52: # %else92
+; RV32-NEXT:    slli a3, a1, 16
+; RV32-NEXT:    bltz a3, .LBB24_113
+; RV32-NEXT:  .LBB24_53: # %else94
+; RV32-NEXT:    slli a3, a1, 15
+; RV32-NEXT:    bltz a3, .LBB24_114
+; RV32-NEXT:  .LBB24_54: # %else96
+; RV32-NEXT:    slli a2, a1, 14
+; RV32-NEXT:    bltz a2, .LBB24_115
+; RV32-NEXT:  .LBB24_55: # %else98
+; RV32-NEXT:    slli a2, a1, 13
+; RV32-NEXT:    bltz a2, .LBB24_116
+; RV32-NEXT:  .LBB24_56: # %else100
+; RV32-NEXT:    slli a2, a1, 12
+; RV32-NEXT:    bltz a2, .LBB24_117
+; RV32-NEXT:  .LBB24_57: # %else102
+; RV32-NEXT:    slli a2, a1, 11
+; RV32-NEXT:    bltz a2, .LBB24_118
+; RV32-NEXT:  .LBB24_58: # %else104
+; RV32-NEXT:    slli a2, a1, 10
+; RV32-NEXT:    bltz a2, .LBB24_119
+; RV32-NEXT:  .LBB24_59: # %else106
+; RV32-NEXT:    slli a2, a1, 9
+; RV32-NEXT:    bltz a2, .LBB24_120
+; RV32-NEXT:  .LBB24_60: # %else108
+; RV32-NEXT:    slli a2, a1, 8
+; RV32-NEXT:    bltz a2, .LBB24_121
+; RV32-NEXT:  .LBB24_61: # %else110
+; RV32-NEXT:    slli a2, a1, 7
+; RV32-NEXT:    bltz a2, .LBB24_122
+; RV32-NEXT:  .LBB24_62: # %else112
+; RV32-NEXT:    slli a2, a1, 6
+; RV32-NEXT:    bltz a2, .LBB24_123
+; RV32-NEXT:  .LBB24_63: # %else114
+; RV32-NEXT:    slli a2, a1, 5
+; RV32-NEXT:    bltz a2, .LBB24_124
+; RV32-NEXT:  .LBB24_64: # %else116
+; RV32-NEXT:    slli a2, a1, 4
+; RV32-NEXT:    bltz a2, .LBB24_125
+; RV32-NEXT:  .LBB24_65: # %else118
+; RV32-NEXT:    slli a2, a1, 3
+; RV32-NEXT:    bltz a2, .LBB24_126
+; RV32-NEXT:  .LBB24_66: # %else120
+; RV32-NEXT:    slli a2, a1, 2
+; RV32-NEXT:    bltz a2, .LBB24_127
+; RV32-NEXT:  .LBB24_67: # %else122
+; RV32-NEXT:    slli a2, a1, 1
+; RV32-NEXT:    bltz a2, .LBB24_128
+; RV32-NEXT:  .LBB24_68: # %else124
+; RV32-NEXT:    bgez a1, .LBB24_70
+; RV32-NEXT:  .LBB24_69: # %cond.store125
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    addi a2, sp, 128
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a2)
+; RV32-NEXT:    lh a1, 254(sp)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 126(a0)
+; RV32-NEXT:  .LBB24_70: # %else126
+; RV32-NEXT:    addi sp, s0, -2032
+; RV32-NEXT:    .cfi_def_cfa sp, 2032
+; RV32-NEXT:    lw ra, 2028(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 2024(sp) # 4-byte Folded Reload
+; RV32-NEXT:    .cfi_restore ra
+; RV32-NEXT:    .cfi_restore s0
+; RV32-NEXT:    addi sp, sp, 2032
+; RV32-NEXT:    .cfi_def_cfa_offset 0
 ; RV32-NEXT:    ret
+; RV32-NEXT:  .LBB24_71: # %cond.store
+; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; RV32-NEXT:    vmv.x.s a1, v8
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 0(a0)
+; RV32-NEXT:    andi a1, a2, 2
+; RV32-NEXT:    beqz a1, .LBB24_2
+; RV32-NEXT:  .LBB24_72: # %cond.store1
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v16, v8, 1
+; RV32-NEXT:    vmv.x.s a1, v16
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 2(a0)
+; RV32-NEXT:    andi a1, a2, 4
+; RV32-NEXT:    beqz a1, .LBB24_3
+; RV32-NEXT:  .LBB24_73: # %cond.store3
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v16, v8, 2
+; RV32-NEXT:    vmv.x.s a1, v16
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 4(a0)
+; RV32-NEXT:    andi a1, a2, 8
+; RV32-NEXT:    beqz a1, .LBB24_4
+; RV32-NEXT:  .LBB24_74: # %cond.store5
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v16, v8, 3
+; RV32-NEXT:    vmv.x.s a1, v16
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 6(a0)
+; RV32-NEXT:    andi a1, a2, 16
+; RV32-NEXT:    beqz a1, .LBB24_5
+; RV32-NEXT:  .LBB24_75: # %cond.store7
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v16, v8, 4
+; RV32-NEXT:    vmv.x.s a1, v16
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 8(a0)
+; RV32-NEXT:    andi a1, a2, 32
+; RV32-NEXT:    beqz a1, .LBB24_6
+; RV32-NEXT:  .LBB24_76: # %cond.store9
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v16, v8, 5
+; RV32-NEXT:    vmv.x.s a1, v16
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 10(a0)
+; RV32-NEXT:    andi a1, a2, 64
+; RV32-NEXT:    beqz a1, .LBB24_7
+; RV32-NEXT:  .LBB24_77: # %cond.store11
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v16, v8, 6
+; RV32-NEXT:    vmv.x.s a1, v16
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 12(a0)
+; RV32-NEXT:    andi a1, a2, 128
+; RV32-NEXT:    beqz a1, .LBB24_8
+; RV32-NEXT:  .LBB24_78: # %cond.store13
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v16, v8, 7
+; RV32-NEXT:    vmv.x.s a1, v16
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 14(a0)
+; RV32-NEXT:    andi a1, a2, 256
+; RV32-NEXT:    beqz a1, .LBB24_9
+; RV32-NEXT:  .LBB24_79: # %cond.store15
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v16, v8, 8
+; RV32-NEXT:    vmv.x.s a1, v16
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 16(a0)
+; RV32-NEXT:    andi a1, a2, 512
+; RV32-NEXT:    beqz a1, .LBB24_10
+; RV32-NEXT:  .LBB24_80: # %cond.store17
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v16, v8, 9
+; RV32-NEXT:    vmv.x.s a1, v16
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 18(a0)
+; RV32-NEXT:    andi a1, a2, 1024
+; RV32-NEXT:    beqz a1, .LBB24_11
+; RV32-NEXT:  .LBB24_81: # %cond.store19
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v16, v8, 10
+; RV32-NEXT:    vmv.x.s a1, v16
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 20(a0)
+; RV32-NEXT:    slli a1, a2, 20
+; RV32-NEXT:    bgez a1, .LBB24_12
+; RV32-NEXT:  .LBB24_82: # %cond.store21
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v16, v8, 11
+; RV32-NEXT:    vmv.x.s a1, v16
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 22(a0)
+; RV32-NEXT:    slli a1, a2, 19
+; RV32-NEXT:    bgez a1, .LBB24_13
+; RV32-NEXT:  .LBB24_83: # %cond.store23
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v16, v8, 12
+; RV32-NEXT:    vmv.x.s a1, v16
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 24(a0)
+; RV32-NEXT:    slli a1, a2, 18
+; RV32-NEXT:    bgez a1, .LBB24_14
+; RV32-NEXT:  .LBB24_84: # %cond.store25
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v16, v8, 13
+; RV32-NEXT:    vmv.x.s a1, v16
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 26(a0)
+; RV32-NEXT:    slli a1, a2, 17
+; RV32-NEXT:    bgez a1, .LBB24_15
+; RV32-NEXT:  .LBB24_85: # %cond.store27
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v16, v8, 14
+; RV32-NEXT:    vmv.x.s a1, v16
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 28(a0)
+; RV32-NEXT:    slli a1, a2, 16
+; RV32-NEXT:    bltz a1, .LBB24_16
+; RV32-NEXT:    j .LBB24_17
+; RV32-NEXT:  .LBB24_86: # %cond.store31
+; RV32-NEXT:    .cfi_restore_state
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    li a4, 3
+; RV32-NEXT:    slli a4, a4, 11
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a1, 1890(a3)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 32(a0)
+; RV32-NEXT:    slli a1, a2, 14
+; RV32-NEXT:    bgez a1, .LBB24_19
+; RV32-NEXT:  .LBB24_87: # %cond.store33
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    lui a4, 1
+; RV32-NEXT:    addi a4, a4, 1920
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a1, 1764(a3)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 34(a0)
+; RV32-NEXT:    slli a1, a2, 13
+; RV32-NEXT:    bgez a1, .LBB24_20
+; RV32-NEXT:  .LBB24_88: # %cond.store35
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    li a4, 23
+; RV32-NEXT:    slli a4, a4, 8
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a1, 1638(a3)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 36(a0)
+; RV32-NEXT:    slli a1, a2, 12
+; RV32-NEXT:    bgez a1, .LBB24_21
+; RV32-NEXT:  .LBB24_89: # %cond.store37
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    lui a4, 1
+; RV32-NEXT:    addi a4, a4, 1664
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a1, 1512(a3)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 38(a0)
+; RV32-NEXT:    slli a1, a2, 11
+; RV32-NEXT:    bgez a1, .LBB24_22
+; RV32-NEXT:  .LBB24_90: # %cond.store39
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    li a4, 11
+; RV32-NEXT:    slli a4, a4, 9
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a1, 1386(a3)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 40(a0)
+; RV32-NEXT:    slli a1, a2, 10
+; RV32-NEXT:    bgez a1, .LBB24_23
+; RV32-NEXT:  .LBB24_91: # %cond.store41
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    lui a4, 1
+; RV32-NEXT:    addi a4, a4, 1408
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a1, 1260(a3)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 42(a0)
+; RV32-NEXT:    slli a1, a2, 9
+; RV32-NEXT:    bgez a1, .LBB24_24
+; RV32-NEXT:  .LBB24_92: # %cond.store43
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    li a4, 21
+; RV32-NEXT:    slli a4, a4, 8
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a1, 1134(a3)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 44(a0)
+; RV32-NEXT:    slli a1, a2, 8
+; RV32-NEXT:    bgez a1, .LBB24_25
+; RV32-NEXT:  .LBB24_93: # %cond.store45
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    lui a4, 1
+; RV32-NEXT:    addi a4, a4, 1152
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a1, 1008(a3)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 46(a0)
+; RV32-NEXT:    slli a1, a2, 7
+; RV32-NEXT:    bgez a1, .LBB24_26
+; RV32-NEXT:  .LBB24_94: # %cond.store47
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    li a4, 5
+; RV32-NEXT:    slli a4, a4, 10
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a1, 882(a3)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 48(a0)
+; RV32-NEXT:    slli a1, a2, 6
+; RV32-NEXT:    bgez a1, .LBB24_27
+; RV32-NEXT:  .LBB24_95: # %cond.store49
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    lui a4, 1
+; RV32-NEXT:    addi a4, a4, 896
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a1, 756(a3)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 50(a0)
+; RV32-NEXT:    slli a1, a2, 5
+; RV32-NEXT:    bgez a1, .LBB24_28
+; RV32-NEXT:  .LBB24_96: # %cond.store51
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    li a4, 19
+; RV32-NEXT:    slli a4, a4, 8
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a1, 630(a3)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 52(a0)
+; RV32-NEXT:    slli a1, a2, 4
+; RV32-NEXT:    bgez a1, .LBB24_29
+; RV32-NEXT:  .LBB24_97: # %cond.store53
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    lui a4, 1
+; RV32-NEXT:    addi a4, a4, 640
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a1, 504(a3)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 54(a0)
+; RV32-NEXT:    slli a1, a2, 3
+; RV32-NEXT:    bltz a1, .LBB24_30
+; RV32-NEXT:    j .LBB24_31
+; RV32-NEXT:  .LBB24_98: # %cond.store63
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    lui a4, 1
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a3, 2016(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 64(a0)
+; RV32-NEXT:    andi a3, a1, 2
+; RV32-NEXT:    beqz a3, .LBB24_39
+; RV32-NEXT:  .LBB24_99: # %cond.store65
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 1921
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a3, 1890(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 66(a0)
+; RV32-NEXT:    andi a3, a1, 4
+; RV32-NEXT:    beqz a3, .LBB24_40
+; RV32-NEXT:  .LBB24_100: # %cond.store67
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 1793
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a3, 1764(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 68(a0)
+; RV32-NEXT:    andi a3, a1, 8
+; RV32-NEXT:    beqz a3, .LBB24_41
+; RV32-NEXT:  .LBB24_101: # %cond.store69
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 1665
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a3, 1638(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 70(a0)
+; RV32-NEXT:    andi a3, a1, 16
+; RV32-NEXT:    beqz a3, .LBB24_42
+; RV32-NEXT:  .LBB24_102: # %cond.store71
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 1537
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a3, 1512(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 72(a0)
+; RV32-NEXT:    andi a3, a1, 32
+; RV32-NEXT:    beqz a3, .LBB24_43
+; RV32-NEXT:  .LBB24_103: # %cond.store73
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 1409
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a3, 1386(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 74(a0)
+; RV32-NEXT:    andi a3, a1, 64
+; RV32-NEXT:    beqz a3, .LBB24_44
+; RV32-NEXT:  .LBB24_104: # %cond.store75
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 1281
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a3, 1260(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 76(a0)
+; RV32-NEXT:    andi a3, a1, 128
+; RV32-NEXT:    beqz a3, .LBB24_45
+; RV32-NEXT:  .LBB24_105: # %cond.store77
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 1153
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a3, 1134(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 78(a0)
+; RV32-NEXT:    andi a3, a1, 256
+; RV32-NEXT:    beqz a3, .LBB24_46
+; RV32-NEXT:  .LBB24_106: # %cond.store79
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 1025
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a3, 1008(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 80(a0)
+; RV32-NEXT:    andi a3, a1, 512
+; RV32-NEXT:    beqz a3, .LBB24_47
+; RV32-NEXT:  .LBB24_107: # %cond.store81
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 897
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a3, 882(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 82(a0)
+; RV32-NEXT:    andi a3, a1, 1024
+; RV32-NEXT:    beqz a3, .LBB24_48
+; RV32-NEXT:  .LBB24_108: # %cond.store83
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 769
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a3, 756(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 84(a0)
+; RV32-NEXT:    slli a3, a1, 20
+; RV32-NEXT:    bgez a3, .LBB24_49
+; RV32-NEXT:  .LBB24_109: # %cond.store85
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 641
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a3, 630(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 86(a0)
+; RV32-NEXT:    slli a3, a1, 19
+; RV32-NEXT:    bgez a3, .LBB24_50
+; RV32-NEXT:  .LBB24_110: # %cond.store87
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 513
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a3, 504(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 88(a0)
+; RV32-NEXT:    slli a3, a1, 18
+; RV32-NEXT:    bgez a3, .LBB24_51
+; RV32-NEXT:  .LBB24_111: # %cond.store89
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 385
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a3, 378(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 90(a0)
+; RV32-NEXT:    slli a3, a1, 17
+; RV32-NEXT:    bgez a3, .LBB24_52
+; RV32-NEXT:  .LBB24_112: # %cond.store91
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 257
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a3, 252(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 92(a0)
+; RV32-NEXT:    slli a3, a1, 16
+; RV32-NEXT:    bgez a3, .LBB24_53
+; RV32-NEXT:  .LBB24_113: # %cond.store93
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 129
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a3, 126(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 94(a0)
+; RV32-NEXT:    slli a3, a1, 15
+; RV32-NEXT:    bgez a3, .LBB24_54
+; RV32-NEXT:  .LBB24_114: # %cond.store95
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 1
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a2, 0(a2)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 96(a0)
+; RV32-NEXT:    slli a2, a1, 14
+; RV32-NEXT:    bgez a2, .LBB24_55
+; RV32-NEXT:  .LBB24_115: # %cond.store97
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    addi a3, sp, 1920
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 2018(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 98(a0)
+; RV32-NEXT:    slli a2, a1, 13
+; RV32-NEXT:    bgez a2, .LBB24_56
+; RV32-NEXT:  .LBB24_116: # %cond.store99
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    addi a3, sp, 1792
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 1892(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 100(a0)
+; RV32-NEXT:    slli a2, a1, 12
+; RV32-NEXT:    bgez a2, .LBB24_57
+; RV32-NEXT:  .LBB24_117: # %cond.store101
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    addi a3, sp, 1664
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 1766(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 102(a0)
+; RV32-NEXT:    slli a2, a1, 11
+; RV32-NEXT:    bgez a2, .LBB24_58
+; RV32-NEXT:  .LBB24_118: # %cond.store103
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    addi a3, sp, 1536
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 1640(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 104(a0)
+; RV32-NEXT:    slli a2, a1, 10
+; RV32-NEXT:    bgez a2, .LBB24_59
+; RV32-NEXT:  .LBB24_119: # %cond.store105
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    addi a3, sp, 1408
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 1514(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 106(a0)
+; RV32-NEXT:    slli a2, a1, 9
+; RV32-NEXT:    bgez a2, .LBB24_60
+; RV32-NEXT:  .LBB24_120: # %cond.store107
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    addi a3, sp, 1280
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 1388(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 108(a0)
+; RV32-NEXT:    slli a2, a1, 8
+; RV32-NEXT:    bgez a2, .LBB24_61
+; RV32-NEXT:  .LBB24_121: # %cond.store109
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    addi a3, sp, 1152
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 1262(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 110(a0)
+; RV32-NEXT:    slli a2, a1, 7
+; RV32-NEXT:    bgez a2, .LBB24_62
+; RV32-NEXT:  .LBB24_122: # %cond.store111
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    addi a3, sp, 1024
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 1136(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 112(a0)
+; RV32-NEXT:    slli a2, a1, 6
+; RV32-NEXT:    bgez a2, .LBB24_63
+; RV32-NEXT:  .LBB24_123: # %cond.store113
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    addi a3, sp, 896
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 1010(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 114(a0)
+; RV32-NEXT:    slli a2, a1, 5
+; RV32-NEXT:    bgez a2, .LBB24_64
+; RV32-NEXT:  .LBB24_124: # %cond.store115
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    addi a3, sp, 768
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 884(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 116(a0)
+; RV32-NEXT:    slli a2, a1, 4
+; RV32-NEXT:    bgez a2, .LBB24_65
+; RV32-NEXT:  .LBB24_125: # %cond.store117
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    addi a3, sp, 640
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 758(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 118(a0)
+; RV32-NEXT:    slli a2, a1, 3
+; RV32-NEXT:    bgez a2, .LBB24_66
+; RV32-NEXT:  .LBB24_126: # %cond.store119
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    addi a3, sp, 512
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 632(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 120(a0)
+; RV32-NEXT:    slli a2, a1, 2
+; RV32-NEXT:    bgez a2, .LBB24_67
+; RV32-NEXT:  .LBB24_127: # %cond.store121
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    addi a3, sp, 384
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 506(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 122(a0)
+; RV32-NEXT:    slli a2, a1, 1
+; RV32-NEXT:    bgez a2, .LBB24_68
+; RV32-NEXT:  .LBB24_128: # %cond.store123
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    addi a3, sp, 256
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a3)
+; RV32-NEXT:    lh a2, 380(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 124(a0)
+; RV32-NEXT:    bltz a1, .LBB24_69
+; RV32-NEXT:    j .LBB24_70
 ;
-; RV64-LABEL: masked_store_v32f64:
+; RV64-LABEL: masked_store_v64bf16:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -16
-; RV64-NEXT:    csrr a3, vlenb
-; RV64-NEXT:    slli a3, a3, 4
-; RV64-NEXT:    sub sp, sp, a3
-; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT:    vle64.v v8, (a2)
-; RV64-NEXT:    addi a2, a2, 128
-; RV64-NEXT:    vle64.v v16, (a2)
-; RV64-NEXT:    csrr a2, vlenb
-; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT:    vmv.x.s a1, v0
+; RV64-NEXT:    andi a2, a1, 1
+; RV64-NEXT:    bnez a2, .LBB24_68
+; RV64-NEXT:  # %bb.1: # %else
+; RV64-NEXT:    andi a2, a1, 2
+; RV64-NEXT:    bnez a2, .LBB24_69
+; RV64-NEXT:  .LBB24_2: # %else2
+; RV64-NEXT:    andi a2, a1, 4
+; RV64-NEXT:    bnez a2, .LBB24_70
+; RV64-NEXT:  .LBB24_3: # %else4
+; RV64-NEXT:    andi a2, a1, 8
+; RV64-NEXT:    bnez a2, .LBB24_71
+; RV64-NEXT:  .LBB24_4: # %else6
+; RV64-NEXT:    andi a2, a1, 16
+; RV64-NEXT:    bnez a2, .LBB24_72
+; RV64-NEXT:  .LBB24_5: # %else8
+; RV64-NEXT:    andi a2, a1, 32
+; RV64-NEXT:    bnez a2, .LBB24_73
+; RV64-NEXT:  .LBB24_6: # %else10
+; RV64-NEXT:    andi a2, a1, 64
+; RV64-NEXT:    bnez a2, .LBB24_74
+; RV64-NEXT:  .LBB24_7: # %else12
+; RV64-NEXT:    andi a2, a1, 128
+; RV64-NEXT:    bnez a2, .LBB24_75
+; RV64-NEXT:  .LBB24_8: # %else14
+; RV64-NEXT:    andi a2, a1, 256
+; RV64-NEXT:    bnez a2, .LBB24_76
+; RV64-NEXT:  .LBB24_9: # %else16
+; RV64-NEXT:    andi a2, a1, 512
+; RV64-NEXT:    bnez a2, .LBB24_77
+; RV64-NEXT:  .LBB24_10: # %else18
+; RV64-NEXT:    andi a2, a1, 1024
+; RV64-NEXT:    bnez a2, .LBB24_78
+; RV64-NEXT:  .LBB24_11: # %else20
+; RV64-NEXT:    slli a2, a1, 52
+; RV64-NEXT:    bltz a2, .LBB24_79
+; RV64-NEXT:  .LBB24_12: # %else22
+; RV64-NEXT:    slli a2, a1, 51
+; RV64-NEXT:    bltz a2, .LBB24_80
+; RV64-NEXT:  .LBB24_13: # %else24
+; RV64-NEXT:    slli a2, a1, 50
+; RV64-NEXT:    bltz a2, .LBB24_81
+; RV64-NEXT:  .LBB24_14: # %else26
+; RV64-NEXT:    slli a2, a1, 49
+; RV64-NEXT:    bltz a2, .LBB24_82
+; RV64-NEXT:  .LBB24_15: # %else28
+; RV64-NEXT:    slli a2, a1, 48
+; RV64-NEXT:    bgez a2, .LBB24_17
+; RV64-NEXT:  .LBB24_16: # %cond.store29
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v16, v8, 15
+; RV64-NEXT:    vmv.x.s a2, v16
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 30(a0)
+; RV64-NEXT:  .LBB24_17: # %else30
+; RV64-NEXT:    addi sp, sp, -2032
+; RV64-NEXT:    .cfi_def_cfa_offset 2032
+; RV64-NEXT:    sd ra, 2024(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s0, 2016(sp) # 8-byte Folded Spill
+; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    .cfi_offset s0, -16
+; RV64-NEXT:    addi s0, sp, 2032
+; RV64-NEXT:    .cfi_def_cfa s0, 0
+; RV64-NEXT:    .cfi_remember_state
+; RV64-NEXT:    lui a2, 1
+; RV64-NEXT:    addiw a2, a2, 272
+; RV64-NEXT:    sub sp, sp, a2
+; RV64-NEXT:    andi sp, sp, -128
+; RV64-NEXT:    slli a3, a1, 47
+; RV64-NEXT:    lui a2, 1
+; RV64-NEXT:    addiw a2, a2, 190
 ; RV64-NEXT:    add a2, sp, a2
-; RV64-NEXT:    addi a2, a2, 16
-; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
-; RV64-NEXT:    fmv.d.x fa5, zero
-; RV64-NEXT:    vmfeq.vf v0, v8, fa5
-; RV64-NEXT:    vle64.v v24, (a0)
-; RV64-NEXT:    addi a0, a0, 128
-; RV64-NEXT:    vle64.v v8, (a0)
-; RV64-NEXT:    addi a0, sp, 16
-; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
-; RV64-NEXT:    add a0, sp, a0
-; RV64-NEXT:    addi a0, a0, 16
-; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV64-NEXT:    vmfeq.vf v8, v16, fa5
-; RV64-NEXT:    vse64.v v24, (a1), v0.t
-; RV64-NEXT:    addi a0, a1, 128
-; RV64-NEXT:    vmv1r.v v0, v8
-; RV64-NEXT:    addi a1, sp, 16
-; RV64-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
-; RV64-NEXT:    vse64.v v8, (a0), v0.t
-; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 4
-; RV64-NEXT:    add sp, sp, a0
-; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    bltz a3, .LBB24_83
+; RV64-NEXT:  # %bb.18: # %else32
+; RV64-NEXT:    slli a3, a1, 46
+; RV64-NEXT:    bltz a3, .LBB24_84
+; RV64-NEXT:  .LBB24_19: # %else34
+; RV64-NEXT:    slli a3, a1, 45
+; RV64-NEXT:    bltz a3, .LBB24_85
+; RV64-NEXT:  .LBB24_20: # %else36
+; RV64-NEXT:    slli a3, a1, 44
+; RV64-NEXT:    bltz a3, .LBB24_86
+; RV64-NEXT:  .LBB24_21: # %else38
+; RV64-NEXT:    slli a3, a1, 43
+; RV64-NEXT:    bltz a3, .LBB24_87
+; RV64-NEXT:  .LBB24_22: # %else40
+; RV64-NEXT:    slli a3, a1, 42
+; RV64-NEXT:    bltz a3, .LBB24_88
+; RV64-NEXT:  .LBB24_23: # %else42
+; RV64-NEXT:    slli a3, a1, 41
+; RV64-NEXT:    bltz a3, .LBB24_89
+; RV64-NEXT:  .LBB24_24: # %else44
+; RV64-NEXT:    slli a3, a1, 40
+; RV64-NEXT:    bltz a3, .LBB24_90
+; RV64-NEXT:  .LBB24_25: # %else46
+; RV64-NEXT:    slli a3, a1, 39
+; RV64-NEXT:    bltz a3, .LBB24_91
+; RV64-NEXT:  .LBB24_26: # %else48
+; RV64-NEXT:    slli a3, a1, 38
+; RV64-NEXT:    bltz a3, .LBB24_92
+; RV64-NEXT:  .LBB24_27: # %else50
+; RV64-NEXT:    slli a3, a1, 37
+; RV64-NEXT:    bltz a3, .LBB24_93
+; RV64-NEXT:  .LBB24_28: # %else52
+; RV64-NEXT:    slli a3, a1, 36
+; RV64-NEXT:    bltz a3, .LBB24_94
+; RV64-NEXT:  .LBB24_29: # %else54
+; RV64-NEXT:    slli a3, a1, 35
+; RV64-NEXT:    bltz a3, .LBB24_95
+; RV64-NEXT:  .LBB24_30: # %else56
+; RV64-NEXT:    slli a3, a1, 34
+; RV64-NEXT:    bltz a3, .LBB24_96
+; RV64-NEXT:  .LBB24_31: # %else58
+; RV64-NEXT:    slli a3, a1, 33
+; RV64-NEXT:    bltz a3, .LBB24_97
+; RV64-NEXT:  .LBB24_32: # %else60
+; RV64-NEXT:    slli a3, a1, 32
+; RV64-NEXT:    bgez a3, .LBB24_34
+; RV64-NEXT:  .LBB24_33: # %cond.store61
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 1
+; RV64-NEXT:    addiw a4, a4, 128
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a2, 0(a2)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 62(a0)
+; RV64-NEXT:  .LBB24_34: # %else62
+; RV64-NEXT:    slli a3, a1, 31
+; RV64-NEXT:    addi a2, sp, 2047
+; RV64-NEXT:    addi a2, a2, 97
+; RV64-NEXT:    bltz a3, .LBB24_98
+; RV64-NEXT:  # %bb.35: # %else64
+; RV64-NEXT:    slli a3, a1, 30
+; RV64-NEXT:    bltz a3, .LBB24_99
+; RV64-NEXT:  .LBB24_36: # %else66
+; RV64-NEXT:    slli a3, a1, 29
+; RV64-NEXT:    bltz a3, .LBB24_100
+; RV64-NEXT:  .LBB24_37: # %else68
+; RV64-NEXT:    slli a3, a1, 28
+; RV64-NEXT:    bltz a3, .LBB24_101
+; RV64-NEXT:  .LBB24_38: # %else70
+; RV64-NEXT:    slli a3, a1, 27
+; RV64-NEXT:    bltz a3, .LBB24_102
+; RV64-NEXT:  .LBB24_39: # %else72
+; RV64-NEXT:    slli a3, a1, 26
+; RV64-NEXT:    bltz a3, .LBB24_103
+; RV64-NEXT:  .LBB24_40: # %else74
+; RV64-NEXT:    slli a3, a1, 25
+; RV64-NEXT:    bltz a3, .LBB24_104
+; RV64-NEXT:  .LBB24_41: # %else76
+; RV64-NEXT:    slli a3, a1, 24
+; RV64-NEXT:    bltz a3, .LBB24_105
+; RV64-NEXT:  .LBB24_42: # %else78
+; RV64-NEXT:    slli a3, a1, 23
+; RV64-NEXT:    bltz a3, .LBB24_106
+; RV64-NEXT:  .LBB24_43: # %else80
+; RV64-NEXT:    slli a3, a1, 22
+; RV64-NEXT:    bltz a3, .LBB24_107
+; RV64-NEXT:  .LBB24_44: # %else82
+; RV64-NEXT:    slli a3, a1, 21
+; RV64-NEXT:    bltz a3, .LBB24_108
+; RV64-NEXT:  .LBB24_45: # %else84
+; RV64-NEXT:    slli a3, a1, 20
+; RV64-NEXT:    bltz a3, .LBB24_109
+; RV64-NEXT:  .LBB24_46: # %else86
+; RV64-NEXT:    slli a3, a1, 19
+; RV64-NEXT:    bltz a3, .LBB24_110
+; RV64-NEXT:  .LBB24_47: # %else88
+; RV64-NEXT:    slli a3, a1, 18
+; RV64-NEXT:    bltz a3, .LBB24_111
+; RV64-NEXT:  .LBB24_48: # %else90
+; RV64-NEXT:    slli a3, a1, 17
+; RV64-NEXT:    bltz a3, .LBB24_112
+; RV64-NEXT:  .LBB24_49: # %else92
+; RV64-NEXT:    slli a3, a1, 16
+; RV64-NEXT:    bltz a3, .LBB24_113
+; RV64-NEXT:  .LBB24_50: # %else94
+; RV64-NEXT:    slli a3, a1, 15
+; RV64-NEXT:    bltz a3, .LBB24_114
+; RV64-NEXT:  .LBB24_51: # %else96
+; RV64-NEXT:    slli a2, a1, 14
+; RV64-NEXT:    bltz a2, .LBB24_115
+; RV64-NEXT:  .LBB24_52: # %else98
+; RV64-NEXT:    slli a2, a1, 13
+; RV64-NEXT:    bltz a2, .LBB24_116
+; RV64-NEXT:  .LBB24_53: # %else100
+; RV64-NEXT:    slli a2, a1, 12
+; RV64-NEXT:    bltz a2, .LBB24_117
+; RV64-NEXT:  .LBB24_54: # %else102
+; RV64-NEXT:    slli a2, a1, 11
+; RV64-NEXT:    bltz a2, .LBB24_118
+; RV64-NEXT:  .LBB24_55: # %else104
+; RV64-NEXT:    slli a2, a1, 10
+; RV64-NEXT:    bltz a2, .LBB24_119
+; RV64-NEXT:  .LBB24_56: # %else106
+; RV64-NEXT:    slli a2, a1, 9
+; RV64-NEXT:    bltz a2, .LBB24_120
+; RV64-NEXT:  .LBB24_57: # %else108
+; RV64-NEXT:    slli a2, a1, 8
+; RV64-NEXT:    bltz a2, .LBB24_121
+; RV64-NEXT:  .LBB24_58: # %else110
+; RV64-NEXT:    slli a2, a1, 7
+; RV64-NEXT:    bltz a2, .LBB24_122
+; RV64-NEXT:  .LBB24_59: # %else112
+; RV64-NEXT:    slli a2, a1, 6
+; RV64-NEXT:    bltz a2, .LBB24_123
+; RV64-NEXT:  .LBB24_60: # %else114
+; RV64-NEXT:    slli a2, a1, 5
+; RV64-NEXT:    bltz a2, .LBB24_124
+; RV64-NEXT:  .LBB24_61: # %else116
+; RV64-NEXT:    slli a2, a1, 4
+; RV64-NEXT:    bltz a2, .LBB24_125
+; RV64-NEXT:  .LBB24_62: # %else118
+; RV64-NEXT:    slli a2, a1, 3
+; RV64-NEXT:    bltz a2, .LBB24_126
+; RV64-NEXT:  .LBB24_63: # %else120
+; RV64-NEXT:    slli a2, a1, 2
+; RV64-NEXT:    bltz a2, .LBB24_127
+; RV64-NEXT:  .LBB24_64: # %else122
+; RV64-NEXT:    slli a2, a1, 1
+; RV64-NEXT:    bltz a2, .LBB24_128
+; RV64-NEXT:  .LBB24_65: # %else124
+; RV64-NEXT:    bgez a1, .LBB24_67
+; RV64-NEXT:  .LBB24_66: # %cond.store125
+; RV64-NEXT:    li a1, 64
+; RV64-NEXT:    addi a2, sp, 128
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a2)
+; RV64-NEXT:    lh a1, 254(sp)
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 126(a0)
+; RV64-NEXT:  .LBB24_67: # %else126
+; RV64-NEXT:    addi sp, s0, -2032
+; RV64-NEXT:    .cfi_def_cfa sp, 2032
+; RV64-NEXT:    ld ra, 2024(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 2016(sp) # 8-byte Folded Reload
+; RV64-NEXT:    .cfi_restore ra
+; RV64-NEXT:    .cfi_restore s0
+; RV64-NEXT:    addi sp, sp, 2032
+; RV64-NEXT:    .cfi_def_cfa_offset 0
 ; RV64-NEXT:    ret
-  %m = load <32 x double>, ptr %m_ptr
-  %mask = fcmp oeq <32 x double> %m, zeroinitializer
-  %val = load <32 x double>, ptr %val_ptr
-  call void @llvm.masked.store.v32f32.p0v32f64(<32 x double> %val, ptr %a, i32 8, <32 x i1> %mask)
+; RV64-NEXT:  .LBB24_68: # %cond.store
+; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; RV64-NEXT:    vmv.x.s a2, v8
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 0(a0)
+; RV64-NEXT:    andi a2, a1, 2
+; RV64-NEXT:    beqz a2, .LBB24_2
+; RV64-NEXT:  .LBB24_69: # %cond.store1
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v16, v8, 1
+; RV64-NEXT:    vmv.x.s a2, v16
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 2(a0)
+; RV64-NEXT:    andi a2, a1, 4
+; RV64-NEXT:    beqz a2, .LBB24_3
+; RV64-NEXT:  .LBB24_70: # %cond.store3
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v16, v8, 2
+; RV64-NEXT:    vmv.x.s a2, v16
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 4(a0)
+; RV64-NEXT:    andi a2, a1, 8
+; RV64-NEXT:    beqz a2, .LBB24_4
+; RV64-NEXT:  .LBB24_71: # %cond.store5
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v16, v8, 3
+; RV64-NEXT:    vmv.x.s a2, v16
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 6(a0)
+; RV64-NEXT:    andi a2, a1, 16
+; RV64-NEXT:    beqz a2, .LBB24_5
+; RV64-NEXT:  .LBB24_72: # %cond.store7
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v16, v8, 4
+; RV64-NEXT:    vmv.x.s a2, v16
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 8(a0)
+; RV64-NEXT:    andi a2, a1, 32
+; RV64-NEXT:    beqz a2, .LBB24_6
+; RV64-NEXT:  .LBB24_73: # %cond.store9
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v16, v8, 5
+; RV64-NEXT:    vmv.x.s a2, v16
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 10(a0)
+; RV64-NEXT:    andi a2, a1, 64
+; RV64-NEXT:    beqz a2, .LBB24_7
+; RV64-NEXT:  .LBB24_74: # %cond.store11
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v16, v8, 6
+; RV64-NEXT:    vmv.x.s a2, v16
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 12(a0)
+; RV64-NEXT:    andi a2, a1, 128
+; RV64-NEXT:    beqz a2, .LBB24_8
+; RV64-NEXT:  .LBB24_75: # %cond.store13
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v16, v8, 7
+; RV64-NEXT:    vmv.x.s a2, v16
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 14(a0)
+; RV64-NEXT:    andi a2, a1, 256
+; RV64-NEXT:    beqz a2, .LBB24_9
+; RV64-NEXT:  .LBB24_76: # %cond.store15
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v16, v8, 8
+; RV64-NEXT:    vmv.x.s a2, v16
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 16(a0)
+; RV64-NEXT:    andi a2, a1, 512
+; RV64-NEXT:    beqz a2, .LBB24_10
+; RV64-NEXT:  .LBB24_77: # %cond.store17
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v16, v8, 9
+; RV64-NEXT:    vmv.x.s a2, v16
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 18(a0)
+; RV64-NEXT:    andi a2, a1, 1024
+; RV64-NEXT:    beqz a2, .LBB24_11
+; RV64-NEXT:  .LBB24_78: # %cond.store19
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v16, v8, 10
+; RV64-NEXT:    vmv.x.s a2, v16
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 20(a0)
+; RV64-NEXT:    slli a2, a1, 52
+; RV64-NEXT:    bgez a2, .LBB24_12
+; RV64-NEXT:  .LBB24_79: # %cond.store21
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v16, v8, 11
+; RV64-NEXT:    vmv.x.s a2, v16
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 22(a0)
+; RV64-NEXT:    slli a2, a1, 51
+; RV64-NEXT:    bgez a2, .LBB24_13
+; RV64-NEXT:  .LBB24_80: # %cond.store23
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v16, v8, 12
+; RV64-NEXT:    vmv.x.s a2, v16
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 24(a0)
+; RV64-NEXT:    slli a2, a1, 50
+; RV64-NEXT:    bgez a2, .LBB24_14
+; RV64-NEXT:  .LBB24_81: # %cond.store25
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v16, v8, 13
+; RV64-NEXT:    vmv.x.s a2, v16
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 26(a0)
+; RV64-NEXT:    slli a2, a1, 49
+; RV64-NEXT:    bgez a2, .LBB24_15
+; RV64-NEXT:  .LBB24_82: # %cond.store27
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v16, v8, 14
+; RV64-NEXT:    vmv.x.s a2, v16
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 28(a0)
+; RV64-NEXT:    slli a2, a1, 48
+; RV64-NEXT:    bltz a2, .LBB24_16
+; RV64-NEXT:    j .LBB24_17
+; RV64-NEXT:  .LBB24_83: # %cond.store31
+; RV64-NEXT:    .cfi_restore_state
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    li a4, 3
+; RV64-NEXT:    slli a4, a4, 11
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1890(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 32(a0)
+; RV64-NEXT:    slli a3, a1, 46
+; RV64-NEXT:    bgez a3, .LBB24_19
+; RV64-NEXT:  .LBB24_84: # %cond.store33
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 1
+; RV64-NEXT:    addiw a4, a4, 1920
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1764(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 34(a0)
+; RV64-NEXT:    slli a3, a1, 45
+; RV64-NEXT:    bgez a3, .LBB24_20
+; RV64-NEXT:  .LBB24_85: # %cond.store35
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    li a4, 23
+; RV64-NEXT:    slli a4, a4, 8
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1638(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 36(a0)
+; RV64-NEXT:    slli a3, a1, 44
+; RV64-NEXT:    bgez a3, .LBB24_21
+; RV64-NEXT:  .LBB24_86: # %cond.store37
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 1
+; RV64-NEXT:    addiw a4, a4, 1664
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1512(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 38(a0)
+; RV64-NEXT:    slli a3, a1, 43
+; RV64-NEXT:    bgez a3, .LBB24_22
+; RV64-NEXT:  .LBB24_87: # %cond.store39
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    li a4, 11
+; RV64-NEXT:    slli a4, a4, 9
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1386(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 40(a0)
+; RV64-NEXT:    slli a3, a1, 42
+; RV64-NEXT:    bgez a3, .LBB24_23
+; RV64-NEXT:  .LBB24_88: # %cond.store41
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 1
+; RV64-NEXT:    addiw a4, a4, 1408
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1260(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 42(a0)
+; RV64-NEXT:    slli a3, a1, 41
+; RV64-NEXT:    bgez a3, .LBB24_24
+; RV64-NEXT:  .LBB24_89: # %cond.store43
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    li a4, 21
+; RV64-NEXT:    slli a4, a4, 8
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1134(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 44(a0)
+; RV64-NEXT:    slli a3, a1, 40
+; RV64-NEXT:    bgez a3, .LBB24_25
+; RV64-NEXT:  .LBB24_90: # %cond.store45
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 1
+; RV64-NEXT:    addiw a4, a4, 1152
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1008(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 46(a0)
+; RV64-NEXT:    slli a3, a1, 39
+; RV64-NEXT:    bgez a3, .LBB24_26
+; RV64-NEXT:  .LBB24_91: # %cond.store47
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    li a4, 5
+; RV64-NEXT:    slli a4, a4, 10
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 882(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 48(a0)
+; RV64-NEXT:    slli a3, a1, 38
+; RV64-NEXT:    bgez a3, .LBB24_27
+; RV64-NEXT:  .LBB24_92: # %cond.store49
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 1
+; RV64-NEXT:    addiw a4, a4, 896
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 756(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 50(a0)
+; RV64-NEXT:    slli a3, a1, 37
+; RV64-NEXT:    bgez a3, .LBB24_28
+; RV64-NEXT:  .LBB24_93: # %cond.store51
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    li a4, 19
+; RV64-NEXT:    slli a4, a4, 8
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 630(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 52(a0)
+; RV64-NEXT:    slli a3, a1, 36
+; RV64-NEXT:    bgez a3, .LBB24_29
+; RV64-NEXT:  .LBB24_94: # %cond.store53
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 1
+; RV64-NEXT:    addiw a4, a4, 640
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 504(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 54(a0)
+; RV64-NEXT:    slli a3, a1, 35
+; RV64-NEXT:    bgez a3, .LBB24_30
+; RV64-NEXT:  .LBB24_95: # %cond.store55
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    li a4, 9
+; RV64-NEXT:    slli a4, a4, 9
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 378(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 56(a0)
+; RV64-NEXT:    slli a3, a1, 34
+; RV64-NEXT:    bgez a3, .LBB24_31
+; RV64-NEXT:  .LBB24_96: # %cond.store57
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 1
+; RV64-NEXT:    addiw a4, a4, 384
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 252(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 58(a0)
+; RV64-NEXT:    slli a3, a1, 33
+; RV64-NEXT:    bgez a3, .LBB24_32
+; RV64-NEXT:  .LBB24_97: # %cond.store59
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    li a4, 17
+; RV64-NEXT:    slli a4, a4, 8
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 126(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 60(a0)
+; RV64-NEXT:    slli a3, a1, 32
+; RV64-NEXT:    bltz a3, .LBB24_33
+; RV64-NEXT:    j .LBB24_34
+; RV64-NEXT:  .LBB24_98: # %cond.store63
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 1
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 2016(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 64(a0)
+; RV64-NEXT:    slli a3, a1, 30
+; RV64-NEXT:    bgez a3, .LBB24_36
+; RV64-NEXT:  .LBB24_99: # %cond.store65
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 1921
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1890(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 66(a0)
+; RV64-NEXT:    slli a3, a1, 29
+; RV64-NEXT:    bgez a3, .LBB24_37
+; RV64-NEXT:  .LBB24_100: # %cond.store67
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 1793
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1764(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 68(a0)
+; RV64-NEXT:    slli a3, a1, 28
+; RV64-NEXT:    bgez a3, .LBB24_38
+; RV64-NEXT:  .LBB24_101: # %cond.store69
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 1665
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1638(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 70(a0)
+; RV64-NEXT:    slli a3, a1, 27
+; RV64-NEXT:    bgez a3, .LBB24_39
+; RV64-NEXT:  .LBB24_102: # %cond.store71
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 1537
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1512(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 72(a0)
+; RV64-NEXT:    slli a3, a1, 26
+; RV64-NEXT:    bgez a3, .LBB24_40
+; RV64-NEXT:  .LBB24_103: # %cond.store73
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 1409
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1386(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 74(a0)
+; RV64-NEXT:    slli a3, a1, 25
+; RV64-NEXT:    bgez a3, .LBB24_41
+; RV64-NEXT:  .LBB24_104: # %cond.store75
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 1281
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1260(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 76(a0)
+; RV64-NEXT:    slli a3, a1, 24
+; RV64-NEXT:    bgez a3, .LBB24_42
+; RV64-NEXT:  .LBB24_105: # %cond.store77
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 1153
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1134(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 78(a0)
+; RV64-NEXT:    slli a3, a1, 23
+; RV64-NEXT:    bgez a3, .LBB24_43
+; RV64-NEXT:  .LBB24_106: # %cond.store79
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 1025
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1008(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 80(a0)
+; RV64-NEXT:    slli a3, a1, 22
+; RV64-NEXT:    bgez a3, .LBB24_44
+; RV64-NEXT:  .LBB24_107: # %cond.store81
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 897
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 882(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 82(a0)
+; RV64-NEXT:    slli a3, a1, 21
+; RV64-NEXT:    bgez a3, .LBB24_45
+; RV64-NEXT:  .LBB24_108: # %cond.store83
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 769
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 756(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 84(a0)
+; RV64-NEXT:    slli a3, a1, 20
+; RV64-NEXT:    bgez a3, .LBB24_46
+; RV64-NEXT:  .LBB24_109: # %cond.store85
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 641
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 630(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 86(a0)
+; RV64-NEXT:    slli a3, a1, 19
+; RV64-NEXT:    bgez a3, .LBB24_47
+; RV64-NEXT:  .LBB24_110: # %cond.store87
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 513
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 504(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 88(a0)
+; RV64-NEXT:    slli a3, a1, 18
+; RV64-NEXT:    bgez a3, .LBB24_48
+; RV64-NEXT:  .LBB24_111: # %cond.store89
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 385
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 378(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 90(a0)
+; RV64-NEXT:    slli a3, a1, 17
+; RV64-NEXT:    bgez a3, .LBB24_49
+; RV64-NEXT:  .LBB24_112: # %cond.store91
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 257
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 252(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 92(a0)
+; RV64-NEXT:    slli a3, a1, 16
+; RV64-NEXT:    bgez a3, .LBB24_50
+; RV64-NEXT:  .LBB24_113: # %cond.store93
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 129
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 126(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 94(a0)
+; RV64-NEXT:    slli a3, a1, 15
+; RV64-NEXT:    bgez a3, .LBB24_51
+; RV64-NEXT:  .LBB24_114: # %cond.store95
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 1
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a2, 0(a2)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 96(a0)
+; RV64-NEXT:    slli a2, a1, 14
+; RV64-NEXT:    bgez a2, .LBB24_52
+; RV64-NEXT:  .LBB24_115: # %cond.store97
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    addi a3, sp, 1920
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 2018(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 98(a0)
+; RV64-NEXT:    slli a2, a1, 13
+; RV64-NEXT:    bgez a2, .LBB24_53
+; RV64-NEXT:  .LBB24_116: # %cond.store99
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    addi a3, sp, 1792
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 1892(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 100(a0)
+; RV64-NEXT:    slli a2, a1, 12
+; RV64-NEXT:    bgez a2, .LBB24_54
+; RV64-NEXT:  .LBB24_117: # %cond.store101
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    addi a3, sp, 1664
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 1766(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 102(a0)
+; RV64-NEXT:    slli a2, a1, 11
+; RV64-NEXT:    bgez a2, .LBB24_55
+; RV64-NEXT:  .LBB24_118: # %cond.store103
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    addi a3, sp, 1536
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 1640(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 104(a0)
+; RV64-NEXT:    slli a2, a1, 10
+; RV64-NEXT:    bgez a2, .LBB24_56
+; RV64-NEXT:  .LBB24_119: # %cond.store105
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    addi a3, sp, 1408
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 1514(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 106(a0)
+; RV64-NEXT:    slli a2, a1, 9
+; RV64-NEXT:    bgez a2, .LBB24_57
+; RV64-NEXT:  .LBB24_120: # %cond.store107
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    addi a3, sp, 1280
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 1388(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 108(a0)
+; RV64-NEXT:    slli a2, a1, 8
+; RV64-NEXT:    bgez a2, .LBB24_58
+; RV64-NEXT:  .LBB24_121: # %cond.store109
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    addi a3, sp, 1152
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 1262(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 110(a0)
+; RV64-NEXT:    slli a2, a1, 7
+; RV64-NEXT:    bgez a2, .LBB24_59
+; RV64-NEXT:  .LBB24_122: # %cond.store111
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    addi a3, sp, 1024
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 1136(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 112(a0)
+; RV64-NEXT:    slli a2, a1, 6
+; RV64-NEXT:    bgez a2, .LBB24_60
+; RV64-NEXT:  .LBB24_123: # %cond.store113
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    addi a3, sp, 896
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 1010(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 114(a0)
+; RV64-NEXT:    slli a2, a1, 5
+; RV64-NEXT:    bgez a2, .LBB24_61
+; RV64-NEXT:  .LBB24_124: # %cond.store115
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    addi a3, sp, 768
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 884(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 116(a0)
+; RV64-NEXT:    slli a2, a1, 4
+; RV64-NEXT:    bgez a2, .LBB24_62
+; RV64-NEXT:  .LBB24_125: # %cond.store117
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    addi a3, sp, 640
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 758(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 118(a0)
+; RV64-NEXT:    slli a2, a1, 3
+; RV64-NEXT:    bgez a2, .LBB24_63
+; RV64-NEXT:  .LBB24_126: # %cond.store119
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    addi a3, sp, 512
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 632(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 120(a0)
+; RV64-NEXT:    slli a2, a1, 2
+; RV64-NEXT:    bgez a2, .LBB24_64
+; RV64-NEXT:  .LBB24_127: # %cond.store121
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    addi a3, sp, 384
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 506(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 122(a0)
+; RV64-NEXT:    slli a2, a1, 1
+; RV64-NEXT:    bgez a2, .LBB24_65
+; RV64-NEXT:  .LBB24_128: # %cond.store123
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    addi a3, sp, 256
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a3)
+; RV64-NEXT:    lh a2, 380(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 124(a0)
+; RV64-NEXT:    bltz a1, .LBB24_66
+; RV64-NEXT:    j .LBB24_67
+  call void @llvm.masked.store.v64bf16.p0(<64 x bfloat> %val, ptr %a, i32 8, <64 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v32f32.p0v32f64(<32 x double>, ptr, i32, <32 x i1>)
 
-define void @masked_store_v64f16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
-; CHECK-LABEL: masked_store_v64f16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a3, 64
-; CHECK-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a2)
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    fmv.h.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
-; CHECK-NEXT:    vse16.v v16, (a1), v0.t
-; CHECK-NEXT:    ret
-  %m = load <64 x half>, ptr %m_ptr
-  %mask = fcmp oeq <64 x half> %m, zeroinitializer
-  %val = load <64 x half>, ptr %val_ptr
-  call void @llvm.masked.store.v64f16.p0v64f16(<64 x half> %val, ptr %a, i32 8, <64 x i1> %mask)
+define void @masked_store_v64f16(<64 x half> %val, ptr %a, <64 x i1> %mask) {
+; ZVFH-LABEL: masked_store_v64f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    li a1, 64
+; ZVFH-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; ZVFH-NEXT:    vse16.v v8, (a0), v0.t
+; ZVFH-NEXT:    ret
+;
+; RV32-ZVFHMIN-LABEL: masked_store_v64f16:
+; RV32-ZVFHMIN:       # %bb.0:
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v0
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 1
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB25_71
+; RV32-ZVFHMIN-NEXT:  # %bb.1: # %else
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 2
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB25_72
+; RV32-ZVFHMIN-NEXT:  .LBB25_2: # %else2
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 4
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB25_73
+; RV32-ZVFHMIN-NEXT:  .LBB25_3: # %else4
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 8
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB25_74
+; RV32-ZVFHMIN-NEXT:  .LBB25_4: # %else6
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 16
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB25_75
+; RV32-ZVFHMIN-NEXT:  .LBB25_5: # %else8
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 32
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB25_76
+; RV32-ZVFHMIN-NEXT:  .LBB25_6: # %else10
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 64
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB25_77
+; RV32-ZVFHMIN-NEXT:  .LBB25_7: # %else12
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 128
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB25_78
+; RV32-ZVFHMIN-NEXT:  .LBB25_8: # %else14
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 256
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB25_79
+; RV32-ZVFHMIN-NEXT:  .LBB25_9: # %else16
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 512
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB25_80
+; RV32-ZVFHMIN-NEXT:  .LBB25_10: # %else18
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 1024
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB25_81
+; RV32-ZVFHMIN-NEXT:  .LBB25_11: # %else20
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 20
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_82
+; RV32-ZVFHMIN-NEXT:  .LBB25_12: # %else22
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 19
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_83
+; RV32-ZVFHMIN-NEXT:  .LBB25_13: # %else24
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 18
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_84
+; RV32-ZVFHMIN-NEXT:  .LBB25_14: # %else26
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 17
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_85
+; RV32-ZVFHMIN-NEXT:  .LBB25_15: # %else28
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 16
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_17
+; RV32-ZVFHMIN-NEXT:  .LBB25_16: # %cond.store29
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 15
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v16
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 30(a0)
+; RV32-ZVFHMIN-NEXT:  .LBB25_17: # %else30
+; RV32-ZVFHMIN-NEXT:    addi sp, sp, -2032
+; RV32-ZVFHMIN-NEXT:    .cfi_def_cfa_offset 2032
+; RV32-ZVFHMIN-NEXT:    sw ra, 2028(sp) # 4-byte Folded Spill
+; RV32-ZVFHMIN-NEXT:    sw s0, 2024(sp) # 4-byte Folded Spill
+; RV32-ZVFHMIN-NEXT:    .cfi_offset ra, -4
+; RV32-ZVFHMIN-NEXT:    .cfi_offset s0, -8
+; RV32-ZVFHMIN-NEXT:    addi s0, sp, 2032
+; RV32-ZVFHMIN-NEXT:    .cfi_def_cfa s0, 0
+; RV32-ZVFHMIN-NEXT:    .cfi_remember_state
+; RV32-ZVFHMIN-NEXT:    lui a1, 1
+; RV32-ZVFHMIN-NEXT:    addi a1, a1, 272
+; RV32-ZVFHMIN-NEXT:    sub sp, sp, a1
+; RV32-ZVFHMIN-NEXT:    andi sp, sp, -128
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 15
+; RV32-ZVFHMIN-NEXT:    lui a3, 1
+; RV32-ZVFHMIN-NEXT:    addi a3, a3, 190
+; RV32-ZVFHMIN-NEXT:    add a3, sp, a3
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_86
+; RV32-ZVFHMIN-NEXT:  # %bb.18: # %else32
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 14
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_87
+; RV32-ZVFHMIN-NEXT:  .LBB25_19: # %else34
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 13
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_88
+; RV32-ZVFHMIN-NEXT:  .LBB25_20: # %else36
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 12
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_89
+; RV32-ZVFHMIN-NEXT:  .LBB25_21: # %else38
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 11
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_90
+; RV32-ZVFHMIN-NEXT:  .LBB25_22: # %else40
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 10
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_91
+; RV32-ZVFHMIN-NEXT:  .LBB25_23: # %else42
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 9
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_92
+; RV32-ZVFHMIN-NEXT:  .LBB25_24: # %else44
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 8
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_93
+; RV32-ZVFHMIN-NEXT:  .LBB25_25: # %else46
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 7
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_94
+; RV32-ZVFHMIN-NEXT:  .LBB25_26: # %else48
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 6
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_95
+; RV32-ZVFHMIN-NEXT:  .LBB25_27: # %else50
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 5
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_96
+; RV32-ZVFHMIN-NEXT:  .LBB25_28: # %else52
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 4
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_97
+; RV32-ZVFHMIN-NEXT:  .LBB25_29: # %else54
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 3
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_31
+; RV32-ZVFHMIN-NEXT:  .LBB25_30: # %cond.store55
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    li a4, 9
+; RV32-ZVFHMIN-NEXT:    slli a4, a4, 9
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a1, 378(a3)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 56(a0)
+; RV32-ZVFHMIN-NEXT:  .LBB25_31: # %else56
+; RV32-ZVFHMIN-NEXT:    slli a4, a2, 2
+; RV32-ZVFHMIN-NEXT:    li a1, 32
+; RV32-ZVFHMIN-NEXT:    bgez a4, .LBB25_33
+; RV32-ZVFHMIN-NEXT:  # %bb.32: # %cond.store57
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 1
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, 384
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a4, 252(a3)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a4
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 58(a0)
+; RV32-ZVFHMIN-NEXT:  .LBB25_33: # %else58
+; RV32-ZVFHMIN-NEXT:    slli a4, a2, 1
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vsrl.vx v16, v0, a1
+; RV32-ZVFHMIN-NEXT:    bgez a4, .LBB25_35
+; RV32-ZVFHMIN-NEXT:  # %bb.34: # %cond.store59
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    li a4, 17
+; RV32-ZVFHMIN-NEXT:    slli a4, a4, 8
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a1, 126(a3)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 60(a0)
+; RV32-ZVFHMIN-NEXT:  .LBB25_35: # %else60
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v16
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_37
+; RV32-ZVFHMIN-NEXT:  # %bb.36: # %cond.store61
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    lui a4, 1
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 128
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a2, 0(a3)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 62(a0)
+; RV32-ZVFHMIN-NEXT:  .LBB25_37: # %else62
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 1
+; RV32-ZVFHMIN-NEXT:    addi a2, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a2, a2, 97
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB25_98
+; RV32-ZVFHMIN-NEXT:  # %bb.38: # %else64
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 2
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB25_99
+; RV32-ZVFHMIN-NEXT:  .LBB25_39: # %else66
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 4
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB25_100
+; RV32-ZVFHMIN-NEXT:  .LBB25_40: # %else68
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 8
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB25_101
+; RV32-ZVFHMIN-NEXT:  .LBB25_41: # %else70
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 16
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB25_102
+; RV32-ZVFHMIN-NEXT:  .LBB25_42: # %else72
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 32
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB25_103
+; RV32-ZVFHMIN-NEXT:  .LBB25_43: # %else74
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 64
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB25_104
+; RV32-ZVFHMIN-NEXT:  .LBB25_44: # %else76
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 128
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB25_105
+; RV32-ZVFHMIN-NEXT:  .LBB25_45: # %else78
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 256
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB25_106
+; RV32-ZVFHMIN-NEXT:  .LBB25_46: # %else80
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 512
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB25_107
+; RV32-ZVFHMIN-NEXT:  .LBB25_47: # %else82
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 1024
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB25_108
+; RV32-ZVFHMIN-NEXT:  .LBB25_48: # %else84
+; RV32-ZVFHMIN-NEXT:    slli a3, a1, 20
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB25_109
+; RV32-ZVFHMIN-NEXT:  .LBB25_49: # %else86
+; RV32-ZVFHMIN-NEXT:    slli a3, a1, 19
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB25_110
+; RV32-ZVFHMIN-NEXT:  .LBB25_50: # %else88
+; RV32-ZVFHMIN-NEXT:    slli a3, a1, 18
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB25_111
+; RV32-ZVFHMIN-NEXT:  .LBB25_51: # %else90
+; RV32-ZVFHMIN-NEXT:    slli a3, a1, 17
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB25_112
+; RV32-ZVFHMIN-NEXT:  .LBB25_52: # %else92
+; RV32-ZVFHMIN-NEXT:    slli a3, a1, 16
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB25_113
+; RV32-ZVFHMIN-NEXT:  .LBB25_53: # %else94
+; RV32-ZVFHMIN-NEXT:    slli a3, a1, 15
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB25_114
+; RV32-ZVFHMIN-NEXT:  .LBB25_54: # %else96
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 14
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_115
+; RV32-ZVFHMIN-NEXT:  .LBB25_55: # %else98
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 13
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_116
+; RV32-ZVFHMIN-NEXT:  .LBB25_56: # %else100
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 12
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_117
+; RV32-ZVFHMIN-NEXT:  .LBB25_57: # %else102
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 11
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_118
+; RV32-ZVFHMIN-NEXT:  .LBB25_58: # %else104
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 10
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_119
+; RV32-ZVFHMIN-NEXT:  .LBB25_59: # %else106
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 9
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_120
+; RV32-ZVFHMIN-NEXT:  .LBB25_60: # %else108
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 8
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_121
+; RV32-ZVFHMIN-NEXT:  .LBB25_61: # %else110
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 7
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_122
+; RV32-ZVFHMIN-NEXT:  .LBB25_62: # %else112
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 6
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_123
+; RV32-ZVFHMIN-NEXT:  .LBB25_63: # %else114
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 5
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_124
+; RV32-ZVFHMIN-NEXT:  .LBB25_64: # %else116
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 4
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_125
+; RV32-ZVFHMIN-NEXT:  .LBB25_65: # %else118
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 3
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_126
+; RV32-ZVFHMIN-NEXT:  .LBB25_66: # %else120
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 2
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_127
+; RV32-ZVFHMIN-NEXT:  .LBB25_67: # %else122
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 1
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB25_128
+; RV32-ZVFHMIN-NEXT:  .LBB25_68: # %else124
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_70
+; RV32-ZVFHMIN-NEXT:  .LBB25_69: # %cond.store125
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    addi a2, sp, 128
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a2)
+; RV32-ZVFHMIN-NEXT:    lh a1, 254(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 126(a0)
+; RV32-ZVFHMIN-NEXT:  .LBB25_70: # %else126
+; RV32-ZVFHMIN-NEXT:    addi sp, s0, -2032
+; RV32-ZVFHMIN-NEXT:    .cfi_def_cfa sp, 2032
+; RV32-ZVFHMIN-NEXT:    lw ra, 2028(sp) # 4-byte Folded Reload
+; RV32-ZVFHMIN-NEXT:    lw s0, 2024(sp) # 4-byte Folded Reload
+; RV32-ZVFHMIN-NEXT:    .cfi_restore ra
+; RV32-ZVFHMIN-NEXT:    .cfi_restore s0
+; RV32-ZVFHMIN-NEXT:    addi sp, sp, 2032
+; RV32-ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; RV32-ZVFHMIN-NEXT:    ret
+; RV32-ZVFHMIN-NEXT:  .LBB25_71: # %cond.store
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v8
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 0(a0)
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 2
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB25_2
+; RV32-ZVFHMIN-NEXT:  .LBB25_72: # %cond.store1
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 1
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v16
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 2(a0)
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 4
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB25_3
+; RV32-ZVFHMIN-NEXT:  .LBB25_73: # %cond.store3
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 2
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v16
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 4(a0)
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 8
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB25_4
+; RV32-ZVFHMIN-NEXT:  .LBB25_74: # %cond.store5
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 3
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v16
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 6(a0)
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 16
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB25_5
+; RV32-ZVFHMIN-NEXT:  .LBB25_75: # %cond.store7
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 4
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v16
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 8(a0)
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 32
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB25_6
+; RV32-ZVFHMIN-NEXT:  .LBB25_76: # %cond.store9
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 5
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v16
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 10(a0)
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 64
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB25_7
+; RV32-ZVFHMIN-NEXT:  .LBB25_77: # %cond.store11
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 6
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v16
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 12(a0)
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 128
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB25_8
+; RV32-ZVFHMIN-NEXT:  .LBB25_78: # %cond.store13
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 7
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v16
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 14(a0)
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 256
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB25_9
+; RV32-ZVFHMIN-NEXT:  .LBB25_79: # %cond.store15
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 8
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v16
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 16(a0)
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 512
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB25_10
+; RV32-ZVFHMIN-NEXT:  .LBB25_80: # %cond.store17
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 9
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v16
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 18(a0)
+; RV32-ZVFHMIN-NEXT:    andi a1, a2, 1024
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB25_11
+; RV32-ZVFHMIN-NEXT:  .LBB25_81: # %cond.store19
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 10
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v16
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 20(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 20
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_12
+; RV32-ZVFHMIN-NEXT:  .LBB25_82: # %cond.store21
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 11
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v16
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 22(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 19
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_13
+; RV32-ZVFHMIN-NEXT:  .LBB25_83: # %cond.store23
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 12
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v16
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 24(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 18
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_14
+; RV32-ZVFHMIN-NEXT:  .LBB25_84: # %cond.store25
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 13
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v16
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 26(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 17
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_15
+; RV32-ZVFHMIN-NEXT:  .LBB25_85: # %cond.store27
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 14
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v16
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 28(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 16
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_16
+; RV32-ZVFHMIN-NEXT:    j .LBB25_17
+; RV32-ZVFHMIN-NEXT:  .LBB25_86: # %cond.store31
+; RV32-ZVFHMIN-NEXT:    .cfi_restore_state
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    li a4, 3
+; RV32-ZVFHMIN-NEXT:    slli a4, a4, 11
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a1, 1890(a3)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 32(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 14
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_19
+; RV32-ZVFHMIN-NEXT:  .LBB25_87: # %cond.store33
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    lui a4, 1
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 1920
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a1, 1764(a3)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 34(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 13
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_20
+; RV32-ZVFHMIN-NEXT:  .LBB25_88: # %cond.store35
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    li a4, 23
+; RV32-ZVFHMIN-NEXT:    slli a4, a4, 8
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a1, 1638(a3)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 36(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 12
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_21
+; RV32-ZVFHMIN-NEXT:  .LBB25_89: # %cond.store37
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    lui a4, 1
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 1664
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a1, 1512(a3)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 38(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 11
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_22
+; RV32-ZVFHMIN-NEXT:  .LBB25_90: # %cond.store39
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    li a4, 11
+; RV32-ZVFHMIN-NEXT:    slli a4, a4, 9
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a1, 1386(a3)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 40(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 10
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_23
+; RV32-ZVFHMIN-NEXT:  .LBB25_91: # %cond.store41
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    lui a4, 1
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 1408
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a1, 1260(a3)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 42(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 9
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_24
+; RV32-ZVFHMIN-NEXT:  .LBB25_92: # %cond.store43
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    li a4, 21
+; RV32-ZVFHMIN-NEXT:    slli a4, a4, 8
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a1, 1134(a3)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 44(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 8
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_25
+; RV32-ZVFHMIN-NEXT:  .LBB25_93: # %cond.store45
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    lui a4, 1
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 1152
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a1, 1008(a3)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 46(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 7
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_26
+; RV32-ZVFHMIN-NEXT:  .LBB25_94: # %cond.store47
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    li a4, 5
+; RV32-ZVFHMIN-NEXT:    slli a4, a4, 10
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a1, 882(a3)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 48(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 6
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_27
+; RV32-ZVFHMIN-NEXT:  .LBB25_95: # %cond.store49
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    lui a4, 1
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 896
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a1, 756(a3)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 50(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 5
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_28
+; RV32-ZVFHMIN-NEXT:  .LBB25_96: # %cond.store51
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    li a4, 19
+; RV32-ZVFHMIN-NEXT:    slli a4, a4, 8
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a1, 630(a3)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 52(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 4
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB25_29
+; RV32-ZVFHMIN-NEXT:  .LBB25_97: # %cond.store53
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    lui a4, 1
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 640
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a1, 504(a3)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 54(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a2, 3
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_30
+; RV32-ZVFHMIN-NEXT:    j .LBB25_31
+; RV32-ZVFHMIN-NEXT:  .LBB25_98: # %cond.store63
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    lui a4, 1
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 2016(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 64(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 2
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB25_39
+; RV32-ZVFHMIN-NEXT:  .LBB25_99: # %cond.store65
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 1921
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1890(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 66(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 4
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB25_40
+; RV32-ZVFHMIN-NEXT:  .LBB25_100: # %cond.store67
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 1793
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1764(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 68(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 8
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB25_41
+; RV32-ZVFHMIN-NEXT:  .LBB25_101: # %cond.store69
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 1665
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1638(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 70(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 16
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB25_42
+; RV32-ZVFHMIN-NEXT:  .LBB25_102: # %cond.store71
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 1537
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1512(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 72(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 32
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB25_43
+; RV32-ZVFHMIN-NEXT:  .LBB25_103: # %cond.store73
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 1409
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1386(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 74(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 64
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB25_44
+; RV32-ZVFHMIN-NEXT:  .LBB25_104: # %cond.store75
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 1281
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1260(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 76(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 128
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB25_45
+; RV32-ZVFHMIN-NEXT:  .LBB25_105: # %cond.store77
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 1153
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1134(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 78(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 256
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB25_46
+; RV32-ZVFHMIN-NEXT:  .LBB25_106: # %cond.store79
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 1025
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1008(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 80(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 512
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB25_47
+; RV32-ZVFHMIN-NEXT:  .LBB25_107: # %cond.store81
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 897
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 882(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 82(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 1024
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB25_48
+; RV32-ZVFHMIN-NEXT:  .LBB25_108: # %cond.store83
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 769
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 756(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 84(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a1, 20
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB25_49
+; RV32-ZVFHMIN-NEXT:  .LBB25_109: # %cond.store85
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 641
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 630(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 86(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a1, 19
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB25_50
+; RV32-ZVFHMIN-NEXT:  .LBB25_110: # %cond.store87
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 513
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 504(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 88(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a1, 18
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB25_51
+; RV32-ZVFHMIN-NEXT:  .LBB25_111: # %cond.store89
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 385
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 378(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 90(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a1, 17
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB25_52
+; RV32-ZVFHMIN-NEXT:  .LBB25_112: # %cond.store91
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 257
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 252(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 92(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a1, 16
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB25_53
+; RV32-ZVFHMIN-NEXT:  .LBB25_113: # %cond.store93
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 129
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 126(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 94(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a1, 15
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB25_54
+; RV32-ZVFHMIN-NEXT:  .LBB25_114: # %cond.store95
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 1
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a2, 0(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 96(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 14
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_55
+; RV32-ZVFHMIN-NEXT:  .LBB25_115: # %cond.store97
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 1920
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 2018(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 98(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 13
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_56
+; RV32-ZVFHMIN-NEXT:  .LBB25_116: # %cond.store99
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 1792
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 1892(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 100(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 12
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_57
+; RV32-ZVFHMIN-NEXT:  .LBB25_117: # %cond.store101
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 1664
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 1766(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 102(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 11
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_58
+; RV32-ZVFHMIN-NEXT:  .LBB25_118: # %cond.store103
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 1536
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 1640(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 104(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 10
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_59
+; RV32-ZVFHMIN-NEXT:  .LBB25_119: # %cond.store105
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 1408
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 1514(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 106(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 9
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_60
+; RV32-ZVFHMIN-NEXT:  .LBB25_120: # %cond.store107
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 1280
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 1388(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 108(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 8
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_61
+; RV32-ZVFHMIN-NEXT:  .LBB25_121: # %cond.store109
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 1152
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 1262(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 110(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 7
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_62
+; RV32-ZVFHMIN-NEXT:  .LBB25_122: # %cond.store111
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 1024
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 1136(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 112(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 6
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_63
+; RV32-ZVFHMIN-NEXT:  .LBB25_123: # %cond.store113
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 896
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 1010(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 114(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 5
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_64
+; RV32-ZVFHMIN-NEXT:  .LBB25_124: # %cond.store115
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 768
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 884(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 116(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 4
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_65
+; RV32-ZVFHMIN-NEXT:  .LBB25_125: # %cond.store117
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 640
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 758(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 118(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 3
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_66
+; RV32-ZVFHMIN-NEXT:  .LBB25_126: # %cond.store119
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 512
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 632(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 120(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 2
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_67
+; RV32-ZVFHMIN-NEXT:  .LBB25_127: # %cond.store121
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 384
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 506(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 122(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 1
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB25_68
+; RV32-ZVFHMIN-NEXT:  .LBB25_128: # %cond.store123
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 256
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 380(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 124(a0)
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB25_69
+; RV32-ZVFHMIN-NEXT:    j .LBB25_70
+;
+; RV64-ZVFHMIN-LABEL: masked_store_v64f16:
+; RV64-ZVFHMIN:       # %bb.0:
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v0
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 1
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB25_68
+; RV64-ZVFHMIN-NEXT:  # %bb.1: # %else
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 2
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB25_69
+; RV64-ZVFHMIN-NEXT:  .LBB25_2: # %else2
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 4
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB25_70
+; RV64-ZVFHMIN-NEXT:  .LBB25_3: # %else4
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 8
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB25_71
+; RV64-ZVFHMIN-NEXT:  .LBB25_4: # %else6
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 16
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB25_72
+; RV64-ZVFHMIN-NEXT:  .LBB25_5: # %else8
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 32
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB25_73
+; RV64-ZVFHMIN-NEXT:  .LBB25_6: # %else10
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 64
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB25_74
+; RV64-ZVFHMIN-NEXT:  .LBB25_7: # %else12
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 128
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB25_75
+; RV64-ZVFHMIN-NEXT:  .LBB25_8: # %else14
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 256
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB25_76
+; RV64-ZVFHMIN-NEXT:  .LBB25_9: # %else16
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 512
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB25_77
+; RV64-ZVFHMIN-NEXT:  .LBB25_10: # %else18
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 1024
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB25_78
+; RV64-ZVFHMIN-NEXT:  .LBB25_11: # %else20
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 52
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_79
+; RV64-ZVFHMIN-NEXT:  .LBB25_12: # %else22
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 51
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_80
+; RV64-ZVFHMIN-NEXT:  .LBB25_13: # %else24
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 50
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_81
+; RV64-ZVFHMIN-NEXT:  .LBB25_14: # %else26
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 49
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_82
+; RV64-ZVFHMIN-NEXT:  .LBB25_15: # %else28
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 48
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_17
+; RV64-ZVFHMIN-NEXT:  .LBB25_16: # %cond.store29
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 15
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v16
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 30(a0)
+; RV64-ZVFHMIN-NEXT:  .LBB25_17: # %else30
+; RV64-ZVFHMIN-NEXT:    addi sp, sp, -2032
+; RV64-ZVFHMIN-NEXT:    .cfi_def_cfa_offset 2032
+; RV64-ZVFHMIN-NEXT:    sd ra, 2024(sp) # 8-byte Folded Spill
+; RV64-ZVFHMIN-NEXT:    sd s0, 2016(sp) # 8-byte Folded Spill
+; RV64-ZVFHMIN-NEXT:    .cfi_offset ra, -8
+; RV64-ZVFHMIN-NEXT:    .cfi_offset s0, -16
+; RV64-ZVFHMIN-NEXT:    addi s0, sp, 2032
+; RV64-ZVFHMIN-NEXT:    .cfi_def_cfa s0, 0
+; RV64-ZVFHMIN-NEXT:    .cfi_remember_state
+; RV64-ZVFHMIN-NEXT:    lui a2, 1
+; RV64-ZVFHMIN-NEXT:    addiw a2, a2, 272
+; RV64-ZVFHMIN-NEXT:    sub sp, sp, a2
+; RV64-ZVFHMIN-NEXT:    andi sp, sp, -128
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 47
+; RV64-ZVFHMIN-NEXT:    lui a2, 1
+; RV64-ZVFHMIN-NEXT:    addiw a2, a2, 190
+; RV64-ZVFHMIN-NEXT:    add a2, sp, a2
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_83
+; RV64-ZVFHMIN-NEXT:  # %bb.18: # %else32
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 46
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_84
+; RV64-ZVFHMIN-NEXT:  .LBB25_19: # %else34
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 45
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_85
+; RV64-ZVFHMIN-NEXT:  .LBB25_20: # %else36
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 44
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_86
+; RV64-ZVFHMIN-NEXT:  .LBB25_21: # %else38
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 43
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_87
+; RV64-ZVFHMIN-NEXT:  .LBB25_22: # %else40
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 42
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_88
+; RV64-ZVFHMIN-NEXT:  .LBB25_23: # %else42
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 41
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_89
+; RV64-ZVFHMIN-NEXT:  .LBB25_24: # %else44
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 40
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_90
+; RV64-ZVFHMIN-NEXT:  .LBB25_25: # %else46
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 39
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_91
+; RV64-ZVFHMIN-NEXT:  .LBB25_26: # %else48
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 38
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_92
+; RV64-ZVFHMIN-NEXT:  .LBB25_27: # %else50
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 37
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_93
+; RV64-ZVFHMIN-NEXT:  .LBB25_28: # %else52
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 36
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_94
+; RV64-ZVFHMIN-NEXT:  .LBB25_29: # %else54
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 35
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_95
+; RV64-ZVFHMIN-NEXT:  .LBB25_30: # %else56
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 34
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_96
+; RV64-ZVFHMIN-NEXT:  .LBB25_31: # %else58
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 33
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_97
+; RV64-ZVFHMIN-NEXT:  .LBB25_32: # %else60
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 32
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_34
+; RV64-ZVFHMIN-NEXT:  .LBB25_33: # %cond.store61
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 1
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, 128
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a2, 0(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 62(a0)
+; RV64-ZVFHMIN-NEXT:  .LBB25_34: # %else62
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 31
+; RV64-ZVFHMIN-NEXT:    addi a2, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a2, a2, 97
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_98
+; RV64-ZVFHMIN-NEXT:  # %bb.35: # %else64
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 30
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_99
+; RV64-ZVFHMIN-NEXT:  .LBB25_36: # %else66
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 29
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_100
+; RV64-ZVFHMIN-NEXT:  .LBB25_37: # %else68
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 28
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_101
+; RV64-ZVFHMIN-NEXT:  .LBB25_38: # %else70
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 27
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_102
+; RV64-ZVFHMIN-NEXT:  .LBB25_39: # %else72
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 26
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_103
+; RV64-ZVFHMIN-NEXT:  .LBB25_40: # %else74
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 25
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_104
+; RV64-ZVFHMIN-NEXT:  .LBB25_41: # %else76
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 24
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_105
+; RV64-ZVFHMIN-NEXT:  .LBB25_42: # %else78
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 23
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_106
+; RV64-ZVFHMIN-NEXT:  .LBB25_43: # %else80
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 22
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_107
+; RV64-ZVFHMIN-NEXT:  .LBB25_44: # %else82
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 21
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_108
+; RV64-ZVFHMIN-NEXT:  .LBB25_45: # %else84
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 20
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_109
+; RV64-ZVFHMIN-NEXT:  .LBB25_46: # %else86
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 19
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_110
+; RV64-ZVFHMIN-NEXT:  .LBB25_47: # %else88
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 18
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_111
+; RV64-ZVFHMIN-NEXT:  .LBB25_48: # %else90
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 17
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_112
+; RV64-ZVFHMIN-NEXT:  .LBB25_49: # %else92
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 16
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_113
+; RV64-ZVFHMIN-NEXT:  .LBB25_50: # %else94
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 15
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_114
+; RV64-ZVFHMIN-NEXT:  .LBB25_51: # %else96
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 14
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_115
+; RV64-ZVFHMIN-NEXT:  .LBB25_52: # %else98
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 13
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_116
+; RV64-ZVFHMIN-NEXT:  .LBB25_53: # %else100
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 12
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_117
+; RV64-ZVFHMIN-NEXT:  .LBB25_54: # %else102
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 11
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_118
+; RV64-ZVFHMIN-NEXT:  .LBB25_55: # %else104
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 10
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_119
+; RV64-ZVFHMIN-NEXT:  .LBB25_56: # %else106
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 9
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_120
+; RV64-ZVFHMIN-NEXT:  .LBB25_57: # %else108
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 8
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_121
+; RV64-ZVFHMIN-NEXT:  .LBB25_58: # %else110
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 7
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_122
+; RV64-ZVFHMIN-NEXT:  .LBB25_59: # %else112
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 6
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_123
+; RV64-ZVFHMIN-NEXT:  .LBB25_60: # %else114
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 5
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_124
+; RV64-ZVFHMIN-NEXT:  .LBB25_61: # %else116
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 4
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_125
+; RV64-ZVFHMIN-NEXT:  .LBB25_62: # %else118
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 3
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_126
+; RV64-ZVFHMIN-NEXT:  .LBB25_63: # %else120
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 2
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_127
+; RV64-ZVFHMIN-NEXT:  .LBB25_64: # %else122
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 1
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_128
+; RV64-ZVFHMIN-NEXT:  .LBB25_65: # %else124
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB25_67
+; RV64-ZVFHMIN-NEXT:  .LBB25_66: # %cond.store125
+; RV64-ZVFHMIN-NEXT:    li a1, 64
+; RV64-ZVFHMIN-NEXT:    addi a2, sp, 128
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a2)
+; RV64-ZVFHMIN-NEXT:    lh a1, 254(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 126(a0)
+; RV64-ZVFHMIN-NEXT:  .LBB25_67: # %else126
+; RV64-ZVFHMIN-NEXT:    addi sp, s0, -2032
+; RV64-ZVFHMIN-NEXT:    .cfi_def_cfa sp, 2032
+; RV64-ZVFHMIN-NEXT:    ld ra, 2024(sp) # 8-byte Folded Reload
+; RV64-ZVFHMIN-NEXT:    ld s0, 2016(sp) # 8-byte Folded Reload
+; RV64-ZVFHMIN-NEXT:    .cfi_restore ra
+; RV64-ZVFHMIN-NEXT:    .cfi_restore s0
+; RV64-ZVFHMIN-NEXT:    addi sp, sp, 2032
+; RV64-ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; RV64-ZVFHMIN-NEXT:    ret
+; RV64-ZVFHMIN-NEXT:  .LBB25_68: # %cond.store
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 0(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 2
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB25_2
+; RV64-ZVFHMIN-NEXT:  .LBB25_69: # %cond.store1
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 1
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v16
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 2(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 4
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB25_3
+; RV64-ZVFHMIN-NEXT:  .LBB25_70: # %cond.store3
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 2
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v16
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 4(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 8
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB25_4
+; RV64-ZVFHMIN-NEXT:  .LBB25_71: # %cond.store5
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 3
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v16
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 6(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 16
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB25_5
+; RV64-ZVFHMIN-NEXT:  .LBB25_72: # %cond.store7
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 4
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v16
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 8(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 32
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB25_6
+; RV64-ZVFHMIN-NEXT:  .LBB25_73: # %cond.store9
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 5
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v16
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 10(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 64
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB25_7
+; RV64-ZVFHMIN-NEXT:  .LBB25_74: # %cond.store11
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 6
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v16
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 12(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 128
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB25_8
+; RV64-ZVFHMIN-NEXT:  .LBB25_75: # %cond.store13
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 7
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v16
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 14(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 256
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB25_9
+; RV64-ZVFHMIN-NEXT:  .LBB25_76: # %cond.store15
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 8
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v16
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 16(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 512
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB25_10
+; RV64-ZVFHMIN-NEXT:  .LBB25_77: # %cond.store17
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 9
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v16
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 18(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 1024
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB25_11
+; RV64-ZVFHMIN-NEXT:  .LBB25_78: # %cond.store19
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 10
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v16
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 20(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 52
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_12
+; RV64-ZVFHMIN-NEXT:  .LBB25_79: # %cond.store21
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 11
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v16
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 22(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 51
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_13
+; RV64-ZVFHMIN-NEXT:  .LBB25_80: # %cond.store23
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 12
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v16
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 24(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 50
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_14
+; RV64-ZVFHMIN-NEXT:  .LBB25_81: # %cond.store25
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 13
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v16
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 26(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 49
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_15
+; RV64-ZVFHMIN-NEXT:  .LBB25_82: # %cond.store27
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v16, v8, 14
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v16
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 28(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 48
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB25_16
+; RV64-ZVFHMIN-NEXT:    j .LBB25_17
+; RV64-ZVFHMIN-NEXT:  .LBB25_83: # %cond.store31
+; RV64-ZVFHMIN-NEXT:    .cfi_restore_state
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 3
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 11
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1890(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 32(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 46
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_19
+; RV64-ZVFHMIN-NEXT:  .LBB25_84: # %cond.store33
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 1
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, 1920
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1764(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 34(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 45
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_20
+; RV64-ZVFHMIN-NEXT:  .LBB25_85: # %cond.store35
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 23
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 8
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1638(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 36(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 44
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_21
+; RV64-ZVFHMIN-NEXT:  .LBB25_86: # %cond.store37
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 1
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, 1664
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1512(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 38(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 43
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_22
+; RV64-ZVFHMIN-NEXT:  .LBB25_87: # %cond.store39
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 11
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 9
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1386(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 40(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 42
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_23
+; RV64-ZVFHMIN-NEXT:  .LBB25_88: # %cond.store41
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 1
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, 1408
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1260(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 42(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 41
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_24
+; RV64-ZVFHMIN-NEXT:  .LBB25_89: # %cond.store43
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 21
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 8
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1134(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 44(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 40
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_25
+; RV64-ZVFHMIN-NEXT:  .LBB25_90: # %cond.store45
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 1
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, 1152
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1008(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 46(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 39
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_26
+; RV64-ZVFHMIN-NEXT:  .LBB25_91: # %cond.store47
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 5
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 10
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 882(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 48(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 38
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_27
+; RV64-ZVFHMIN-NEXT:  .LBB25_92: # %cond.store49
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 1
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, 896
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 756(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 50(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 37
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_28
+; RV64-ZVFHMIN-NEXT:  .LBB25_93: # %cond.store51
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 19
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 8
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 630(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 52(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 36
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_29
+; RV64-ZVFHMIN-NEXT:  .LBB25_94: # %cond.store53
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 1
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, 640
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 504(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 54(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 35
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_30
+; RV64-ZVFHMIN-NEXT:  .LBB25_95: # %cond.store55
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 9
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 9
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 378(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 56(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 34
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_31
+; RV64-ZVFHMIN-NEXT:  .LBB25_96: # %cond.store57
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 1
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, 384
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 252(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 58(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 33
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_32
+; RV64-ZVFHMIN-NEXT:  .LBB25_97: # %cond.store59
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 17
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 8
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 126(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 60(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 32
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB25_33
+; RV64-ZVFHMIN-NEXT:    j .LBB25_34
+; RV64-ZVFHMIN-NEXT:  .LBB25_98: # %cond.store63
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 1
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 2016(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 64(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 30
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_36
+; RV64-ZVFHMIN-NEXT:  .LBB25_99: # %cond.store65
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 1921
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1890(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 66(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 29
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_37
+; RV64-ZVFHMIN-NEXT:  .LBB25_100: # %cond.store67
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 1793
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1764(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 68(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 28
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_38
+; RV64-ZVFHMIN-NEXT:  .LBB25_101: # %cond.store69
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 1665
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1638(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 70(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 27
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_39
+; RV64-ZVFHMIN-NEXT:  .LBB25_102: # %cond.store71
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 1537
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1512(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 72(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 26
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_40
+; RV64-ZVFHMIN-NEXT:  .LBB25_103: # %cond.store73
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 1409
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1386(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 74(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 25
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_41
+; RV64-ZVFHMIN-NEXT:  .LBB25_104: # %cond.store75
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 1281
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1260(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 76(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 24
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_42
+; RV64-ZVFHMIN-NEXT:  .LBB25_105: # %cond.store77
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 1153
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1134(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 78(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 23
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_43
+; RV64-ZVFHMIN-NEXT:  .LBB25_106: # %cond.store79
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 1025
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1008(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 80(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 22
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_44
+; RV64-ZVFHMIN-NEXT:  .LBB25_107: # %cond.store81
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 897
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 882(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 82(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 21
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_45
+; RV64-ZVFHMIN-NEXT:  .LBB25_108: # %cond.store83
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 769
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 756(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 84(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 20
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_46
+; RV64-ZVFHMIN-NEXT:  .LBB25_109: # %cond.store85
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 641
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 630(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 86(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 19
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_47
+; RV64-ZVFHMIN-NEXT:  .LBB25_110: # %cond.store87
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 513
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 504(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 88(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 18
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_48
+; RV64-ZVFHMIN-NEXT:  .LBB25_111: # %cond.store89
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 385
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 378(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 90(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 17
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_49
+; RV64-ZVFHMIN-NEXT:  .LBB25_112: # %cond.store91
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 257
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 252(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 92(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 16
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_50
+; RV64-ZVFHMIN-NEXT:  .LBB25_113: # %cond.store93
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 129
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 126(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 94(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 15
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB25_51
+; RV64-ZVFHMIN-NEXT:  .LBB25_114: # %cond.store95
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 1
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a2, 0(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 96(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 14
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_52
+; RV64-ZVFHMIN-NEXT:  .LBB25_115: # %cond.store97
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 1920
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 2018(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 98(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 13
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_53
+; RV64-ZVFHMIN-NEXT:  .LBB25_116: # %cond.store99
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 1792
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 1892(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 100(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 12
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_54
+; RV64-ZVFHMIN-NEXT:  .LBB25_117: # %cond.store101
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 1664
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 1766(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 102(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 11
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_55
+; RV64-ZVFHMIN-NEXT:  .LBB25_118: # %cond.store103
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 1536
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 1640(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 104(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 10
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_56
+; RV64-ZVFHMIN-NEXT:  .LBB25_119: # %cond.store105
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 1408
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 1514(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 106(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 9
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_57
+; RV64-ZVFHMIN-NEXT:  .LBB25_120: # %cond.store107
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 1280
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 1388(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 108(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 8
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_58
+; RV64-ZVFHMIN-NEXT:  .LBB25_121: # %cond.store109
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 1152
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 1262(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 110(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 7
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_59
+; RV64-ZVFHMIN-NEXT:  .LBB25_122: # %cond.store111
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 1024
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 1136(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 112(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 6
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_60
+; RV64-ZVFHMIN-NEXT:  .LBB25_123: # %cond.store113
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 896
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 1010(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 114(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 5
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_61
+; RV64-ZVFHMIN-NEXT:  .LBB25_124: # %cond.store115
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 768
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 884(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 116(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 4
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_62
+; RV64-ZVFHMIN-NEXT:  .LBB25_125: # %cond.store117
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 640
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 758(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 118(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 3
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_63
+; RV64-ZVFHMIN-NEXT:  .LBB25_126: # %cond.store119
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 512
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 632(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 120(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 2
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_64
+; RV64-ZVFHMIN-NEXT:  .LBB25_127: # %cond.store121
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 384
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 506(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 122(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 1
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB25_65
+; RV64-ZVFHMIN-NEXT:  .LBB25_128: # %cond.store123
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 256
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 380(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 124(a0)
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB25_66
+; RV64-ZVFHMIN-NEXT:    j .LBB25_67
+  call void @llvm.masked.store.v64f16.p0(<64 x half> %val, ptr %a, i32 8, <64 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v64f16.p0v64f16(<64 x half>, ptr, i32, <64 x i1>)
 
-define void @masked_store_v64f32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v64f32(<64 x float> %val, ptr %a, <64 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v64f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrr a3, vlenb
-; CHECK-NEXT:    slli a3, a3, 4
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    li a3, 32
-; CHECK-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a2)
-; CHECK-NEXT:    addi a2, a2, 128
-; CHECK-NEXT:    vle32.v v16, (a2)
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT:    fmv.w.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
-; CHECK-NEXT:    vle32.v v24, (a0)
-; CHECK-NEXT:    addi a0, a0, 128
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmfeq.vf v8, v16, fa5
-; CHECK-NEXT:    vse32.v v24, (a1), v0.t
-; CHECK-NEXT:    addi a0, a1, 128
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
 ; CHECK-NEXT:    vse32.v v8, (a0), v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
+; CHECK-NEXT:    vslidedown.vi v0, v0, 4
+; CHECK-NEXT:    addi a0, a0, 128
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT:    vse32.v v16, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <64 x float>, ptr %m_ptr
-  %mask = fcmp oeq <64 x float> %m, zeroinitializer
-  %val = load <64 x float>, ptr %val_ptr
-  call void @llvm.masked.store.v64f16.p0v64f32(<64 x float> %val, ptr %a, i32 8, <64 x i1> %mask)
+  call void @llvm.masked.store.v64f32.p0(<64 x float> %val, ptr %a, i32 8, <64 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v64f16.p0v64f32(<64 x float>, ptr, i32, <64 x i1>)
 
-define void @masked_store_v128f16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
-; CHECK-LABEL: masked_store_v128f16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrr a3, vlenb
-; CHECK-NEXT:    slli a3, a3, 4
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    li a3, 64
-; CHECK-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a2)
-; CHECK-NEXT:    addi a2, a2, 128
-; CHECK-NEXT:    vle16.v v16, (a2)
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT:    fmv.h.x fa5, zero
-; CHECK-NEXT:    vmfeq.vf v0, v8, fa5
-; CHECK-NEXT:    vle16.v v24, (a0)
-; CHECK-NEXT:    addi a0, a0, 128
-; CHECK-NEXT:    vle16.v v8, (a0)
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmfeq.vf v8, v16, fa5
-; CHECK-NEXT:    vse16.v v24, (a1), v0.t
-; CHECK-NEXT:    addi a0, a1, 128
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT:    vse16.v v8, (a0), v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
-  %m = load <128 x half>, ptr %m_ptr
-  %mask = fcmp oeq <128 x half> %m, zeroinitializer
-  %val = load <128 x half>, ptr %val_ptr
-  call void @llvm.masked.store.v128f16.p0v128f16(<128 x half> %val, ptr %a, i32 8, <128 x i1> %mask)
+define void @masked_store_v128bf16(<128 x bfloat> %val, ptr %a, <128 x i1> %mask) {
+; RV32-LABEL: masked_store_v128bf16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vmv.x.s a3, v0
+; RV32-NEXT:    andi a1, a3, 1
+; RV32-NEXT:    bnez a1, .LBB27_141
+; RV32-NEXT:  # %bb.1: # %else
+; RV32-NEXT:    andi a1, a3, 2
+; RV32-NEXT:    bnez a1, .LBB27_142
+; RV32-NEXT:  .LBB27_2: # %else2
+; RV32-NEXT:    andi a1, a3, 4
+; RV32-NEXT:    bnez a1, .LBB27_143
+; RV32-NEXT:  .LBB27_3: # %else4
+; RV32-NEXT:    andi a1, a3, 8
+; RV32-NEXT:    bnez a1, .LBB27_144
+; RV32-NEXT:  .LBB27_4: # %else6
+; RV32-NEXT:    andi a1, a3, 16
+; RV32-NEXT:    bnez a1, .LBB27_145
+; RV32-NEXT:  .LBB27_5: # %else8
+; RV32-NEXT:    andi a1, a3, 32
+; RV32-NEXT:    bnez a1, .LBB27_146
+; RV32-NEXT:  .LBB27_6: # %else10
+; RV32-NEXT:    andi a1, a3, 64
+; RV32-NEXT:    bnez a1, .LBB27_147
+; RV32-NEXT:  .LBB27_7: # %else12
+; RV32-NEXT:    andi a1, a3, 128
+; RV32-NEXT:    bnez a1, .LBB27_148
+; RV32-NEXT:  .LBB27_8: # %else14
+; RV32-NEXT:    andi a1, a3, 256
+; RV32-NEXT:    bnez a1, .LBB27_149
+; RV32-NEXT:  .LBB27_9: # %else16
+; RV32-NEXT:    andi a1, a3, 512
+; RV32-NEXT:    bnez a1, .LBB27_150
+; RV32-NEXT:  .LBB27_10: # %else18
+; RV32-NEXT:    andi a1, a3, 1024
+; RV32-NEXT:    bnez a1, .LBB27_151
+; RV32-NEXT:  .LBB27_11: # %else20
+; RV32-NEXT:    slli a1, a3, 20
+; RV32-NEXT:    bltz a1, .LBB27_152
+; RV32-NEXT:  .LBB27_12: # %else22
+; RV32-NEXT:    slli a1, a3, 19
+; RV32-NEXT:    bltz a1, .LBB27_153
+; RV32-NEXT:  .LBB27_13: # %else24
+; RV32-NEXT:    slli a1, a3, 18
+; RV32-NEXT:    bltz a1, .LBB27_154
+; RV32-NEXT:  .LBB27_14: # %else26
+; RV32-NEXT:    slli a1, a3, 17
+; RV32-NEXT:    bltz a1, .LBB27_155
+; RV32-NEXT:  .LBB27_15: # %else28
+; RV32-NEXT:    slli a1, a3, 16
+; RV32-NEXT:    bgez a1, .LBB27_17
+; RV32-NEXT:  .LBB27_16: # %cond.store29
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v24, v8, 15
+; RV32-NEXT:    vmv.x.s a1, v24
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 30(a0)
+; RV32-NEXT:  .LBB27_17: # %else30
+; RV32-NEXT:    addi sp, sp, -2032
+; RV32-NEXT:    .cfi_def_cfa_offset 2032
+; RV32-NEXT:    sw ra, 2028(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s0, 2024(sp) # 4-byte Folded Spill
+; RV32-NEXT:    .cfi_offset ra, -4
+; RV32-NEXT:    .cfi_offset s0, -8
+; RV32-NEXT:    addi s0, sp, 2032
+; RV32-NEXT:    .cfi_def_cfa s0, 0
+; RV32-NEXT:    .cfi_remember_state
+; RV32-NEXT:    lui a1, 3
+; RV32-NEXT:    addi a1, a1, -1776
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    andi sp, sp, -128
+; RV32-NEXT:    slli a1, a3, 15
+; RV32-NEXT:    lui a2, 3
+; RV32-NEXT:    addi a2, a2, -1606
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    bltz a1, .LBB27_156
+; RV32-NEXT:  # %bb.18: # %else32
+; RV32-NEXT:    slli a1, a3, 14
+; RV32-NEXT:    bltz a1, .LBB27_157
+; RV32-NEXT:  .LBB27_19: # %else34
+; RV32-NEXT:    slli a1, a3, 13
+; RV32-NEXT:    bltz a1, .LBB27_158
+; RV32-NEXT:  .LBB27_20: # %else36
+; RV32-NEXT:    slli a1, a3, 12
+; RV32-NEXT:    bltz a1, .LBB27_159
+; RV32-NEXT:  .LBB27_21: # %else38
+; RV32-NEXT:    slli a1, a3, 11
+; RV32-NEXT:    bltz a1, .LBB27_160
+; RV32-NEXT:  .LBB27_22: # %else40
+; RV32-NEXT:    slli a1, a3, 10
+; RV32-NEXT:    bltz a1, .LBB27_161
+; RV32-NEXT:  .LBB27_23: # %else42
+; RV32-NEXT:    slli a1, a3, 9
+; RV32-NEXT:    bltz a1, .LBB27_162
+; RV32-NEXT:  .LBB27_24: # %else44
+; RV32-NEXT:    slli a1, a3, 8
+; RV32-NEXT:    bltz a1, .LBB27_163
+; RV32-NEXT:  .LBB27_25: # %else46
+; RV32-NEXT:    slli a1, a3, 7
+; RV32-NEXT:    bltz a1, .LBB27_164
+; RV32-NEXT:  .LBB27_26: # %else48
+; RV32-NEXT:    slli a1, a3, 6
+; RV32-NEXT:    bltz a1, .LBB27_165
+; RV32-NEXT:  .LBB27_27: # %else50
+; RV32-NEXT:    slli a1, a3, 5
+; RV32-NEXT:    bltz a1, .LBB27_166
+; RV32-NEXT:  .LBB27_28: # %else52
+; RV32-NEXT:    slli a1, a3, 4
+; RV32-NEXT:    bltz a1, .LBB27_167
+; RV32-NEXT:  .LBB27_29: # %else54
+; RV32-NEXT:    slli a1, a3, 3
+; RV32-NEXT:    bgez a1, .LBB27_31
+; RV32-NEXT:  .LBB27_30: # %cond.store55
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    li a4, 21
+; RV32-NEXT:    slli a4, a4, 9
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a1, 126(a2)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 56(a0)
+; RV32-NEXT:  .LBB27_31: # %else56
+; RV32-NEXT:    slli a4, a3, 2
+; RV32-NEXT:    li a1, 32
+; RV32-NEXT:    bgez a4, .LBB27_33
+; RV32-NEXT:  # %bb.32: # %cond.store57
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    lui a5, 3
+; RV32-NEXT:    addi a5, a5, -1664
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a2, 0(a2)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 58(a0)
+; RV32-NEXT:  .LBB27_33: # %else58
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vsrl.vx v24, v0, a1
+; RV32-NEXT:    slli a2, a3, 1
+; RV32-NEXT:    lui a4, 2
+; RV32-NEXT:    addi a4, a4, 348
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    bgez a2, .LBB27_35
+; RV32-NEXT:  # %bb.34: # %cond.store59
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    lui a5, 3
+; RV32-NEXT:    addi a5, a5, -1792
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a2, 2016(a4)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 60(a0)
+; RV32-NEXT:  .LBB27_35: # %else60
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vmv.x.s a2, v24
+; RV32-NEXT:    bltz a3, .LBB27_168
+; RV32-NEXT:  # %bb.36: # %else62
+; RV32-NEXT:    andi a3, a2, 1
+; RV32-NEXT:    bnez a3, .LBB27_169
+; RV32-NEXT:  .LBB27_37: # %else64
+; RV32-NEXT:    andi a3, a2, 2
+; RV32-NEXT:    bnez a3, .LBB27_170
+; RV32-NEXT:  .LBB27_38: # %else66
+; RV32-NEXT:    andi a3, a2, 4
+; RV32-NEXT:    bnez a3, .LBB27_171
+; RV32-NEXT:  .LBB27_39: # %else68
+; RV32-NEXT:    andi a3, a2, 8
+; RV32-NEXT:    bnez a3, .LBB27_172
+; RV32-NEXT:  .LBB27_40: # %else70
+; RV32-NEXT:    andi a3, a2, 16
+; RV32-NEXT:    bnez a3, .LBB27_173
+; RV32-NEXT:  .LBB27_41: # %else72
+; RV32-NEXT:    andi a3, a2, 32
+; RV32-NEXT:    bnez a3, .LBB27_174
+; RV32-NEXT:  .LBB27_42: # %else74
+; RV32-NEXT:    andi a3, a2, 64
+; RV32-NEXT:    bnez a3, .LBB27_175
+; RV32-NEXT:  .LBB27_43: # %else76
+; RV32-NEXT:    andi a3, a2, 128
+; RV32-NEXT:    bnez a3, .LBB27_176
+; RV32-NEXT:  .LBB27_44: # %else78
+; RV32-NEXT:    andi a3, a2, 256
+; RV32-NEXT:    bnez a3, .LBB27_177
+; RV32-NEXT:  .LBB27_45: # %else80
+; RV32-NEXT:    andi a3, a2, 512
+; RV32-NEXT:    bnez a3, .LBB27_178
+; RV32-NEXT:  .LBB27_46: # %else82
+; RV32-NEXT:    andi a3, a2, 1024
+; RV32-NEXT:    bnez a3, .LBB27_179
+; RV32-NEXT:  .LBB27_47: # %else84
+; RV32-NEXT:    slli a3, a2, 20
+; RV32-NEXT:    bltz a3, .LBB27_180
+; RV32-NEXT:  .LBB27_48: # %else86
+; RV32-NEXT:    slli a3, a2, 19
+; RV32-NEXT:    bltz a3, .LBB27_181
+; RV32-NEXT:  .LBB27_49: # %else88
+; RV32-NEXT:    slli a3, a2, 18
+; RV32-NEXT:    bltz a3, .LBB27_182
+; RV32-NEXT:  .LBB27_50: # %else90
+; RV32-NEXT:    slli a3, a2, 17
+; RV32-NEXT:    bgez a3, .LBB27_52
+; RV32-NEXT:  .LBB27_51: # %cond.store91
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    lui a5, 2
+; RV32-NEXT:    addi a5, a5, 256
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 0(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 92(a0)
+; RV32-NEXT:  .LBB27_52: # %else92
+; RV32-NEXT:    slli a3, a2, 16
+; RV32-NEXT:    lui a4, 2
+; RV32-NEXT:    addi a4, a4, -1794
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    bltz a3, .LBB27_183
+; RV32-NEXT:  # %bb.53: # %else94
+; RV32-NEXT:    slli a3, a2, 15
+; RV32-NEXT:    bltz a3, .LBB27_184
+; RV32-NEXT:  .LBB27_54: # %else96
+; RV32-NEXT:    slli a3, a2, 14
+; RV32-NEXT:    bltz a3, .LBB27_185
+; RV32-NEXT:  .LBB27_55: # %else98
+; RV32-NEXT:    slli a3, a2, 13
+; RV32-NEXT:    bltz a3, .LBB27_186
+; RV32-NEXT:  .LBB27_56: # %else100
+; RV32-NEXT:    slli a3, a2, 12
+; RV32-NEXT:    bltz a3, .LBB27_187
+; RV32-NEXT:  .LBB27_57: # %else102
+; RV32-NEXT:    slli a3, a2, 11
+; RV32-NEXT:    bltz a3, .LBB27_188
+; RV32-NEXT:  .LBB27_58: # %else104
+; RV32-NEXT:    slli a3, a2, 10
+; RV32-NEXT:    bltz a3, .LBB27_189
+; RV32-NEXT:  .LBB27_59: # %else106
+; RV32-NEXT:    slli a3, a2, 9
+; RV32-NEXT:    bltz a3, .LBB27_190
+; RV32-NEXT:  .LBB27_60: # %else108
+; RV32-NEXT:    slli a3, a2, 8
+; RV32-NEXT:    bltz a3, .LBB27_191
+; RV32-NEXT:  .LBB27_61: # %else110
+; RV32-NEXT:    slli a3, a2, 7
+; RV32-NEXT:    bltz a3, .LBB27_192
+; RV32-NEXT:  .LBB27_62: # %else112
+; RV32-NEXT:    slli a3, a2, 6
+; RV32-NEXT:    bltz a3, .LBB27_193
+; RV32-NEXT:  .LBB27_63: # %else114
+; RV32-NEXT:    slli a3, a2, 5
+; RV32-NEXT:    bltz a3, .LBB27_194
+; RV32-NEXT:  .LBB27_64: # %else116
+; RV32-NEXT:    slli a3, a2, 4
+; RV32-NEXT:    bltz a3, .LBB27_195
+; RV32-NEXT:  .LBB27_65: # %else118
+; RV32-NEXT:    slli a3, a2, 3
+; RV32-NEXT:    bltz a3, .LBB27_196
+; RV32-NEXT:  .LBB27_66: # %else120
+; RV32-NEXT:    slli a3, a2, 2
+; RV32-NEXT:    bgez a3, .LBB27_68
+; RV32-NEXT:  .LBB27_67: # %cond.store121
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    lui a5, 2
+; RV32-NEXT:    addi a5, a5, -1664
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 252(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 122(a0)
+; RV32-NEXT:  .LBB27_68: # %else122
+; RV32-NEXT:    slli a3, a2, 1
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v24, v0, 1
+; RV32-NEXT:    bgez a3, .LBB27_70
+; RV32-NEXT:  # %bb.69: # %cond.store123
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    li a5, 25
+; RV32-NEXT:    slli a5, a5, 8
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 126(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 124(a0)
+; RV32-NEXT:  .LBB27_70: # %else124
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vmv.x.s a3, v24
+; RV32-NEXT:    bltz a2, .LBB27_197
+; RV32-NEXT:  # %bb.71: # %else126
+; RV32-NEXT:    andi a2, a3, 1
+; RV32-NEXT:    bnez a2, .LBB27_198
+; RV32-NEXT:  .LBB27_72: # %else128
+; RV32-NEXT:    andi a2, a3, 2
+; RV32-NEXT:    bnez a2, .LBB27_199
+; RV32-NEXT:  .LBB27_73: # %else130
+; RV32-NEXT:    andi a2, a3, 4
+; RV32-NEXT:    bnez a2, .LBB27_200
+; RV32-NEXT:  .LBB27_74: # %else132
+; RV32-NEXT:    andi a2, a3, 8
+; RV32-NEXT:    bnez a2, .LBB27_201
+; RV32-NEXT:  .LBB27_75: # %else134
+; RV32-NEXT:    andi a2, a3, 16
+; RV32-NEXT:    bnez a2, .LBB27_202
+; RV32-NEXT:  .LBB27_76: # %else136
+; RV32-NEXT:    andi a2, a3, 32
+; RV32-NEXT:    bnez a2, .LBB27_203
+; RV32-NEXT:  .LBB27_77: # %else138
+; RV32-NEXT:    andi a2, a3, 64
+; RV32-NEXT:    bnez a2, .LBB27_204
+; RV32-NEXT:  .LBB27_78: # %else140
+; RV32-NEXT:    andi a2, a3, 128
+; RV32-NEXT:    bnez a2, .LBB27_205
+; RV32-NEXT:  .LBB27_79: # %else142
+; RV32-NEXT:    andi a2, a3, 256
+; RV32-NEXT:    bnez a2, .LBB27_206
+; RV32-NEXT:  .LBB27_80: # %else144
+; RV32-NEXT:    andi a2, a3, 512
+; RV32-NEXT:    bnez a2, .LBB27_207
+; RV32-NEXT:  .LBB27_81: # %else146
+; RV32-NEXT:    andi a2, a3, 1024
+; RV32-NEXT:    bnez a2, .LBB27_208
+; RV32-NEXT:  .LBB27_82: # %else148
+; RV32-NEXT:    slli a2, a3, 20
+; RV32-NEXT:    bltz a2, .LBB27_209
+; RV32-NEXT:  .LBB27_83: # %else150
+; RV32-NEXT:    slli a2, a3, 19
+; RV32-NEXT:    bltz a2, .LBB27_210
+; RV32-NEXT:  .LBB27_84: # %else152
+; RV32-NEXT:    slli a2, a3, 18
+; RV32-NEXT:    bltz a2, .LBB27_211
+; RV32-NEXT:  .LBB27_85: # %else154
+; RV32-NEXT:    slli a2, a3, 17
+; RV32-NEXT:    bltz a2, .LBB27_212
+; RV32-NEXT:  .LBB27_86: # %else156
+; RV32-NEXT:    slli a2, a3, 16
+; RV32-NEXT:    bgez a2, .LBB27_88
+; RV32-NEXT:  .LBB27_87: # %cond.store157
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v8, v16, 15
+; RV32-NEXT:    vmv.x.s a2, v8
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 158(a0)
+; RV32-NEXT:  .LBB27_88: # %else158
+; RV32-NEXT:    slli a4, a3, 15
+; RV32-NEXT:    lui a2, 1
+; RV32-NEXT:    addi a2, a2, 190
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    bltz a4, .LBB27_213
+; RV32-NEXT:  # %bb.89: # %else160
+; RV32-NEXT:    slli a4, a3, 14
+; RV32-NEXT:    bltz a4, .LBB27_214
+; RV32-NEXT:  .LBB27_90: # %else162
+; RV32-NEXT:    slli a4, a3, 13
+; RV32-NEXT:    bltz a4, .LBB27_215
+; RV32-NEXT:  .LBB27_91: # %else164
+; RV32-NEXT:    slli a4, a3, 12
+; RV32-NEXT:    bltz a4, .LBB27_216
+; RV32-NEXT:  .LBB27_92: # %else166
+; RV32-NEXT:    slli a4, a3, 11
+; RV32-NEXT:    bltz a4, .LBB27_217
+; RV32-NEXT:  .LBB27_93: # %else168
+; RV32-NEXT:    slli a4, a3, 10
+; RV32-NEXT:    bltz a4, .LBB27_218
+; RV32-NEXT:  .LBB27_94: # %else170
+; RV32-NEXT:    slli a4, a3, 9
+; RV32-NEXT:    bltz a4, .LBB27_219
+; RV32-NEXT:  .LBB27_95: # %else172
+; RV32-NEXT:    slli a4, a3, 8
+; RV32-NEXT:    bltz a4, .LBB27_220
+; RV32-NEXT:  .LBB27_96: # %else174
+; RV32-NEXT:    slli a4, a3, 7
+; RV32-NEXT:    bltz a4, .LBB27_221
+; RV32-NEXT:  .LBB27_97: # %else176
+; RV32-NEXT:    slli a4, a3, 6
+; RV32-NEXT:    bltz a4, .LBB27_222
+; RV32-NEXT:  .LBB27_98: # %else178
+; RV32-NEXT:    slli a4, a3, 5
+; RV32-NEXT:    bltz a4, .LBB27_223
+; RV32-NEXT:  .LBB27_99: # %else180
+; RV32-NEXT:    slli a4, a3, 4
+; RV32-NEXT:    bltz a4, .LBB27_224
+; RV32-NEXT:  .LBB27_100: # %else182
+; RV32-NEXT:    slli a4, a3, 3
+; RV32-NEXT:    bltz a4, .LBB27_225
+; RV32-NEXT:  .LBB27_101: # %else184
+; RV32-NEXT:    slli a4, a3, 2
+; RV32-NEXT:    bgez a4, .LBB27_103
+; RV32-NEXT:  .LBB27_102: # %cond.store185
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    lui a5, 1
+; RV32-NEXT:    addi a5, a5, 384
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a5)
+; RV32-NEXT:    lh a4, 252(a2)
+; RV32-NEXT:    fmv.h.x fa5, a4
+; RV32-NEXT:    fsh fa5, 186(a0)
+; RV32-NEXT:  .LBB27_103: # %else186
+; RV32-NEXT:    slli a4, a3, 1
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vsrl.vx v8, v24, a1
+; RV32-NEXT:    bgez a4, .LBB27_105
+; RV32-NEXT:  # %bb.104: # %cond.store187
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    li a4, 17
+; RV32-NEXT:    slli a4, a4, 8
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a4)
+; RV32-NEXT:    lh a1, 126(a2)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 188(a0)
+; RV32-NEXT:  .LBB27_105: # %else188
+; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT:    vmv.x.s a1, v8
+; RV32-NEXT:    bgez a3, .LBB27_107
+; RV32-NEXT:  # %bb.106: # %cond.store189
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    lui a4, 1
+; RV32-NEXT:    addi a4, a4, 128
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a4)
+; RV32-NEXT:    lh a2, 0(a2)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 190(a0)
+; RV32-NEXT:  .LBB27_107: # %else190
+; RV32-NEXT:    andi a3, a1, 1
+; RV32-NEXT:    addi a2, sp, 2047
+; RV32-NEXT:    addi a2, a2, 97
+; RV32-NEXT:    bnez a3, .LBB27_226
+; RV32-NEXT:  # %bb.108: # %else192
+; RV32-NEXT:    andi a3, a1, 2
+; RV32-NEXT:    bnez a3, .LBB27_227
+; RV32-NEXT:  .LBB27_109: # %else194
+; RV32-NEXT:    andi a3, a1, 4
+; RV32-NEXT:    bnez a3, .LBB27_228
+; RV32-NEXT:  .LBB27_110: # %else196
+; RV32-NEXT:    andi a3, a1, 8
+; RV32-NEXT:    bnez a3, .LBB27_229
+; RV32-NEXT:  .LBB27_111: # %else198
+; RV32-NEXT:    andi a3, a1, 16
+; RV32-NEXT:    bnez a3, .LBB27_230
+; RV32-NEXT:  .LBB27_112: # %else200
+; RV32-NEXT:    andi a3, a1, 32
+; RV32-NEXT:    bnez a3, .LBB27_231
+; RV32-NEXT:  .LBB27_113: # %else202
+; RV32-NEXT:    andi a3, a1, 64
+; RV32-NEXT:    bnez a3, .LBB27_232
+; RV32-NEXT:  .LBB27_114: # %else204
+; RV32-NEXT:    andi a3, a1, 128
+; RV32-NEXT:    bnez a3, .LBB27_233
+; RV32-NEXT:  .LBB27_115: # %else206
+; RV32-NEXT:    andi a3, a1, 256
+; RV32-NEXT:    bnez a3, .LBB27_234
+; RV32-NEXT:  .LBB27_116: # %else208
+; RV32-NEXT:    andi a3, a1, 512
+; RV32-NEXT:    bnez a3, .LBB27_235
+; RV32-NEXT:  .LBB27_117: # %else210
+; RV32-NEXT:    andi a3, a1, 1024
+; RV32-NEXT:    bnez a3, .LBB27_236
+; RV32-NEXT:  .LBB27_118: # %else212
+; RV32-NEXT:    slli a3, a1, 20
+; RV32-NEXT:    bltz a3, .LBB27_237
+; RV32-NEXT:  .LBB27_119: # %else214
+; RV32-NEXT:    slli a3, a1, 19
+; RV32-NEXT:    bltz a3, .LBB27_238
+; RV32-NEXT:  .LBB27_120: # %else216
+; RV32-NEXT:    slli a3, a1, 18
+; RV32-NEXT:    bltz a3, .LBB27_239
+; RV32-NEXT:  .LBB27_121: # %else218
+; RV32-NEXT:    slli a3, a1, 17
+; RV32-NEXT:    bltz a3, .LBB27_240
+; RV32-NEXT:  .LBB27_122: # %else220
+; RV32-NEXT:    slli a3, a1, 16
+; RV32-NEXT:    bgez a3, .LBB27_123
+; RV32-NEXT:    j .LBB27_241
+; RV32-NEXT:  .LBB27_123: # %else222
+; RV32-NEXT:    slli a3, a1, 15
+; RV32-NEXT:    bgez a3, .LBB27_124
+; RV32-NEXT:    j .LBB27_242
+; RV32-NEXT:  .LBB27_124: # %else224
+; RV32-NEXT:    slli a2, a1, 14
+; RV32-NEXT:    bgez a2, .LBB27_125
+; RV32-NEXT:    j .LBB27_243
+; RV32-NEXT:  .LBB27_125: # %else226
+; RV32-NEXT:    slli a2, a1, 13
+; RV32-NEXT:    bgez a2, .LBB27_126
+; RV32-NEXT:    j .LBB27_244
+; RV32-NEXT:  .LBB27_126: # %else228
+; RV32-NEXT:    slli a2, a1, 12
+; RV32-NEXT:    bgez a2, .LBB27_127
+; RV32-NEXT:    j .LBB27_245
+; RV32-NEXT:  .LBB27_127: # %else230
+; RV32-NEXT:    slli a2, a1, 11
+; RV32-NEXT:    bgez a2, .LBB27_128
+; RV32-NEXT:    j .LBB27_246
+; RV32-NEXT:  .LBB27_128: # %else232
+; RV32-NEXT:    slli a2, a1, 10
+; RV32-NEXT:    bgez a2, .LBB27_129
+; RV32-NEXT:    j .LBB27_247
+; RV32-NEXT:  .LBB27_129: # %else234
+; RV32-NEXT:    slli a2, a1, 9
+; RV32-NEXT:    bgez a2, .LBB27_130
+; RV32-NEXT:    j .LBB27_248
+; RV32-NEXT:  .LBB27_130: # %else236
+; RV32-NEXT:    slli a2, a1, 8
+; RV32-NEXT:    bgez a2, .LBB27_131
+; RV32-NEXT:    j .LBB27_249
+; RV32-NEXT:  .LBB27_131: # %else238
+; RV32-NEXT:    slli a2, a1, 7
+; RV32-NEXT:    bgez a2, .LBB27_132
+; RV32-NEXT:    j .LBB27_250
+; RV32-NEXT:  .LBB27_132: # %else240
+; RV32-NEXT:    slli a2, a1, 6
+; RV32-NEXT:    bgez a2, .LBB27_133
+; RV32-NEXT:    j .LBB27_251
+; RV32-NEXT:  .LBB27_133: # %else242
+; RV32-NEXT:    slli a2, a1, 5
+; RV32-NEXT:    bgez a2, .LBB27_134
+; RV32-NEXT:    j .LBB27_252
+; RV32-NEXT:  .LBB27_134: # %else244
+; RV32-NEXT:    slli a2, a1, 4
+; RV32-NEXT:    bgez a2, .LBB27_135
+; RV32-NEXT:    j .LBB27_253
+; RV32-NEXT:  .LBB27_135: # %else246
+; RV32-NEXT:    slli a2, a1, 3
+; RV32-NEXT:    bgez a2, .LBB27_136
+; RV32-NEXT:    j .LBB27_254
+; RV32-NEXT:  .LBB27_136: # %else248
+; RV32-NEXT:    slli a2, a1, 2
+; RV32-NEXT:    bgez a2, .LBB27_137
+; RV32-NEXT:    j .LBB27_255
+; RV32-NEXT:  .LBB27_137: # %else250
+; RV32-NEXT:    slli a2, a1, 1
+; RV32-NEXT:    bgez a2, .LBB27_138
+; RV32-NEXT:    j .LBB27_256
+; RV32-NEXT:  .LBB27_138: # %else252
+; RV32-NEXT:    bgez a1, .LBB27_140
+; RV32-NEXT:  .LBB27_139: # %cond.store253
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    addi a2, sp, 128
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a2)
+; RV32-NEXT:    lh a1, 254(sp)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 254(a0)
+; RV32-NEXT:  .LBB27_140: # %else254
+; RV32-NEXT:    addi sp, s0, -2032
+; RV32-NEXT:    .cfi_def_cfa sp, 2032
+; RV32-NEXT:    lw ra, 2028(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 2024(sp) # 4-byte Folded Reload
+; RV32-NEXT:    .cfi_restore ra
+; RV32-NEXT:    .cfi_restore s0
+; RV32-NEXT:    addi sp, sp, 2032
+; RV32-NEXT:    .cfi_def_cfa_offset 0
+; RV32-NEXT:    ret
+; RV32-NEXT:  .LBB27_141: # %cond.store
+; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; RV32-NEXT:    vmv.x.s a1, v8
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 0(a0)
+; RV32-NEXT:    andi a1, a3, 2
+; RV32-NEXT:    beqz a1, .LBB27_2
+; RV32-NEXT:  .LBB27_142: # %cond.store1
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v24, v8, 1
+; RV32-NEXT:    vmv.x.s a1, v24
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 2(a0)
+; RV32-NEXT:    andi a1, a3, 4
+; RV32-NEXT:    beqz a1, .LBB27_3
+; RV32-NEXT:  .LBB27_143: # %cond.store3
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v24, v8, 2
+; RV32-NEXT:    vmv.x.s a1, v24
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 4(a0)
+; RV32-NEXT:    andi a1, a3, 8
+; RV32-NEXT:    beqz a1, .LBB27_4
+; RV32-NEXT:  .LBB27_144: # %cond.store5
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v24, v8, 3
+; RV32-NEXT:    vmv.x.s a1, v24
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 6(a0)
+; RV32-NEXT:    andi a1, a3, 16
+; RV32-NEXT:    beqz a1, .LBB27_5
+; RV32-NEXT:  .LBB27_145: # %cond.store7
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v24, v8, 4
+; RV32-NEXT:    vmv.x.s a1, v24
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 8(a0)
+; RV32-NEXT:    andi a1, a3, 32
+; RV32-NEXT:    beqz a1, .LBB27_6
+; RV32-NEXT:  .LBB27_146: # %cond.store9
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v24, v8, 5
+; RV32-NEXT:    vmv.x.s a1, v24
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 10(a0)
+; RV32-NEXT:    andi a1, a3, 64
+; RV32-NEXT:    beqz a1, .LBB27_7
+; RV32-NEXT:  .LBB27_147: # %cond.store11
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v24, v8, 6
+; RV32-NEXT:    vmv.x.s a1, v24
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 12(a0)
+; RV32-NEXT:    andi a1, a3, 128
+; RV32-NEXT:    beqz a1, .LBB27_8
+; RV32-NEXT:  .LBB27_148: # %cond.store13
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v24, v8, 7
+; RV32-NEXT:    vmv.x.s a1, v24
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 14(a0)
+; RV32-NEXT:    andi a1, a3, 256
+; RV32-NEXT:    beqz a1, .LBB27_9
+; RV32-NEXT:  .LBB27_149: # %cond.store15
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v24, v8, 8
+; RV32-NEXT:    vmv.x.s a1, v24
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 16(a0)
+; RV32-NEXT:    andi a1, a3, 512
+; RV32-NEXT:    beqz a1, .LBB27_10
+; RV32-NEXT:  .LBB27_150: # %cond.store17
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v24, v8, 9
+; RV32-NEXT:    vmv.x.s a1, v24
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 18(a0)
+; RV32-NEXT:    andi a1, a3, 1024
+; RV32-NEXT:    beqz a1, .LBB27_11
+; RV32-NEXT:  .LBB27_151: # %cond.store19
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v24, v8, 10
+; RV32-NEXT:    vmv.x.s a1, v24
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 20(a0)
+; RV32-NEXT:    slli a1, a3, 20
+; RV32-NEXT:    bgez a1, .LBB27_12
+; RV32-NEXT:  .LBB27_152: # %cond.store21
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v24, v8, 11
+; RV32-NEXT:    vmv.x.s a1, v24
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 22(a0)
+; RV32-NEXT:    slli a1, a3, 19
+; RV32-NEXT:    bgez a1, .LBB27_13
+; RV32-NEXT:  .LBB27_153: # %cond.store23
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v24, v8, 12
+; RV32-NEXT:    vmv.x.s a1, v24
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 24(a0)
+; RV32-NEXT:    slli a1, a3, 18
+; RV32-NEXT:    bgez a1, .LBB27_14
+; RV32-NEXT:  .LBB27_154: # %cond.store25
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v24, v8, 13
+; RV32-NEXT:    vmv.x.s a1, v24
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 26(a0)
+; RV32-NEXT:    slli a1, a3, 17
+; RV32-NEXT:    bgez a1, .LBB27_15
+; RV32-NEXT:  .LBB27_155: # %cond.store27
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v24, v8, 14
+; RV32-NEXT:    vmv.x.s a1, v24
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 28(a0)
+; RV32-NEXT:    slli a1, a3, 16
+; RV32-NEXT:    bltz a1, .LBB27_16
+; RV32-NEXT:    j .LBB27_17
+; RV32-NEXT:  .LBB27_156: # %cond.store31
+; RV32-NEXT:    .cfi_restore_state
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    lui a4, 3
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a1, 1638(a2)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 32(a0)
+; RV32-NEXT:    slli a1, a3, 14
+; RV32-NEXT:    bgez a1, .LBB27_19
+; RV32-NEXT:  .LBB27_157: # %cond.store33
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    lui a4, 3
+; RV32-NEXT:    addi a4, a4, -128
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a1, 1512(a2)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 34(a0)
+; RV32-NEXT:    slli a1, a3, 13
+; RV32-NEXT:    bgez a1, .LBB27_20
+; RV32-NEXT:  .LBB27_158: # %cond.store35
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    lui a4, 3
+; RV32-NEXT:    addi a4, a4, -256
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a1, 1386(a2)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 36(a0)
+; RV32-NEXT:    slli a1, a3, 12
+; RV32-NEXT:    bgez a1, .LBB27_21
+; RV32-NEXT:  .LBB27_159: # %cond.store37
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    lui a4, 3
+; RV32-NEXT:    addi a4, a4, -384
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a1, 1260(a2)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 38(a0)
+; RV32-NEXT:    slli a1, a3, 11
+; RV32-NEXT:    bgez a1, .LBB27_22
+; RV32-NEXT:  .LBB27_160: # %cond.store39
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    li a4, 23
+; RV32-NEXT:    slli a4, a4, 9
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a1, 1134(a2)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 40(a0)
+; RV32-NEXT:    slli a1, a3, 10
+; RV32-NEXT:    bgez a1, .LBB27_23
+; RV32-NEXT:  .LBB27_161: # %cond.store41
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    lui a4, 3
+; RV32-NEXT:    addi a4, a4, -640
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a1, 1008(a2)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 42(a0)
+; RV32-NEXT:    slli a1, a3, 9
+; RV32-NEXT:    bgez a1, .LBB27_24
+; RV32-NEXT:  .LBB27_162: # %cond.store43
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    lui a4, 3
+; RV32-NEXT:    addi a4, a4, -768
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a1, 882(a2)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 44(a0)
+; RV32-NEXT:    slli a1, a3, 8
+; RV32-NEXT:    bgez a1, .LBB27_25
+; RV32-NEXT:  .LBB27_163: # %cond.store45
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    lui a4, 3
+; RV32-NEXT:    addi a4, a4, -896
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a1, 756(a2)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 46(a0)
+; RV32-NEXT:    slli a1, a3, 7
+; RV32-NEXT:    bgez a1, .LBB27_26
+; RV32-NEXT:  .LBB27_164: # %cond.store47
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    li a4, 11
+; RV32-NEXT:    slli a4, a4, 10
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a1, 630(a2)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 48(a0)
+; RV32-NEXT:    slli a1, a3, 6
+; RV32-NEXT:    bgez a1, .LBB27_27
+; RV32-NEXT:  .LBB27_165: # %cond.store49
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    lui a4, 3
+; RV32-NEXT:    addi a4, a4, -1152
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a1, 504(a2)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 50(a0)
+; RV32-NEXT:    slli a1, a3, 5
+; RV32-NEXT:    bgez a1, .LBB27_28
+; RV32-NEXT:  .LBB27_166: # %cond.store51
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    lui a4, 3
+; RV32-NEXT:    addi a4, a4, -1280
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a1, 378(a2)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 52(a0)
+; RV32-NEXT:    slli a1, a3, 4
+; RV32-NEXT:    bgez a1, .LBB27_29
+; RV32-NEXT:  .LBB27_167: # %cond.store53
+; RV32-NEXT:    li a1, 64
+; RV32-NEXT:    lui a4, 3
+; RV32-NEXT:    addi a4, a4, -1408
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a4)
+; RV32-NEXT:    lh a1, 252(a2)
+; RV32-NEXT:    fmv.h.x fa5, a1
+; RV32-NEXT:    fsh fa5, 54(a0)
+; RV32-NEXT:    slli a1, a3, 3
+; RV32-NEXT:    bltz a1, .LBB27_30
+; RV32-NEXT:    j .LBB27_31
+; RV32-NEXT:  .LBB27_168: # %cond.store61
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    lui a5, 3
+; RV32-NEXT:    addi a5, a5, -1920
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 1890(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 62(a0)
+; RV32-NEXT:    andi a3, a2, 1
+; RV32-NEXT:    beqz a3, .LBB27_37
+; RV32-NEXT:  .LBB27_169: # %cond.store63
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    li a5, 5
+; RV32-NEXT:    slli a5, a5, 11
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 1764(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 64(a0)
+; RV32-NEXT:    andi a3, a2, 2
+; RV32-NEXT:    beqz a3, .LBB27_38
+; RV32-NEXT:  .LBB27_170: # %cond.store65
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    lui a5, 2
+; RV32-NEXT:    addi a5, a5, 1920
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 1638(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 66(a0)
+; RV32-NEXT:    andi a3, a2, 4
+; RV32-NEXT:    beqz a3, .LBB27_39
+; RV32-NEXT:  .LBB27_171: # %cond.store67
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    lui a5, 2
+; RV32-NEXT:    addi a5, a5, 1792
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 1512(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 68(a0)
+; RV32-NEXT:    andi a3, a2, 8
+; RV32-NEXT:    beqz a3, .LBB27_40
+; RV32-NEXT:  .LBB27_172: # %cond.store69
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    lui a5, 2
+; RV32-NEXT:    addi a5, a5, 1664
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 1386(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 70(a0)
+; RV32-NEXT:    andi a3, a2, 16
+; RV32-NEXT:    beqz a3, .LBB27_41
+; RV32-NEXT:  .LBB27_173: # %cond.store71
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    li a5, 19
+; RV32-NEXT:    slli a5, a5, 9
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 1260(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 72(a0)
+; RV32-NEXT:    andi a3, a2, 32
+; RV32-NEXT:    beqz a3, .LBB27_42
+; RV32-NEXT:  .LBB27_174: # %cond.store73
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    lui a5, 2
+; RV32-NEXT:    addi a5, a5, 1408
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 1134(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 74(a0)
+; RV32-NEXT:    andi a3, a2, 64
+; RV32-NEXT:    beqz a3, .LBB27_43
+; RV32-NEXT:  .LBB27_175: # %cond.store75
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    lui a5, 2
+; RV32-NEXT:    addi a5, a5, 1280
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 1008(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 76(a0)
+; RV32-NEXT:    andi a3, a2, 128
+; RV32-NEXT:    beqz a3, .LBB27_44
+; RV32-NEXT:  .LBB27_176: # %cond.store77
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    lui a5, 2
+; RV32-NEXT:    addi a5, a5, 1152
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 882(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 78(a0)
+; RV32-NEXT:    andi a3, a2, 256
+; RV32-NEXT:    beqz a3, .LBB27_45
+; RV32-NEXT:  .LBB27_177: # %cond.store79
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    li a5, 9
+; RV32-NEXT:    slli a5, a5, 10
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 756(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 80(a0)
+; RV32-NEXT:    andi a3, a2, 512
+; RV32-NEXT:    beqz a3, .LBB27_46
+; RV32-NEXT:  .LBB27_178: # %cond.store81
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    lui a5, 2
+; RV32-NEXT:    addi a5, a5, 896
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 630(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 82(a0)
+; RV32-NEXT:    andi a3, a2, 1024
+; RV32-NEXT:    beqz a3, .LBB27_47
+; RV32-NEXT:  .LBB27_179: # %cond.store83
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    lui a5, 2
+; RV32-NEXT:    addi a5, a5, 768
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 504(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 84(a0)
+; RV32-NEXT:    slli a3, a2, 20
+; RV32-NEXT:    bgez a3, .LBB27_48
+; RV32-NEXT:  .LBB27_180: # %cond.store85
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    lui a5, 2
+; RV32-NEXT:    addi a5, a5, 640
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 378(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 86(a0)
+; RV32-NEXT:    slli a3, a2, 19
+; RV32-NEXT:    bgez a3, .LBB27_49
+; RV32-NEXT:  .LBB27_181: # %cond.store87
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    li a5, 17
+; RV32-NEXT:    slli a5, a5, 9
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 252(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 88(a0)
+; RV32-NEXT:    slli a3, a2, 18
+; RV32-NEXT:    bgez a3, .LBB27_50
+; RV32-NEXT:  .LBB27_182: # %cond.store89
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    lui a5, 2
+; RV32-NEXT:    addi a5, a5, 384
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 126(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 90(a0)
+; RV32-NEXT:    slli a3, a2, 17
+; RV32-NEXT:    bltz a3, .LBB27_51
+; RV32-NEXT:    j .LBB27_52
+; RV32-NEXT:  .LBB27_183: # %cond.store93
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    lui a5, 2
+; RV32-NEXT:    addi a5, a5, 128
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 2016(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 94(a0)
+; RV32-NEXT:    slli a3, a2, 15
+; RV32-NEXT:    bgez a3, .LBB27_54
+; RV32-NEXT:  .LBB27_184: # %cond.store95
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    lui a5, 2
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 1890(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 96(a0)
+; RV32-NEXT:    slli a3, a2, 14
+; RV32-NEXT:    bgez a3, .LBB27_55
+; RV32-NEXT:  .LBB27_185: # %cond.store97
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    lui a5, 2
+; RV32-NEXT:    addi a5, a5, -128
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 1764(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 98(a0)
+; RV32-NEXT:    slli a3, a2, 13
+; RV32-NEXT:    bgez a3, .LBB27_56
+; RV32-NEXT:  .LBB27_186: # %cond.store99
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    li a5, 31
+; RV32-NEXT:    slli a5, a5, 8
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 1638(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 100(a0)
+; RV32-NEXT:    slli a3, a2, 12
+; RV32-NEXT:    bgez a3, .LBB27_57
+; RV32-NEXT:  .LBB27_187: # %cond.store101
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    lui a5, 2
+; RV32-NEXT:    addi a5, a5, -384
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 1512(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 102(a0)
+; RV32-NEXT:    slli a3, a2, 11
+; RV32-NEXT:    bgez a3, .LBB27_58
+; RV32-NEXT:  .LBB27_188: # %cond.store103
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    li a5, 15
+; RV32-NEXT:    slli a5, a5, 9
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 1386(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 104(a0)
+; RV32-NEXT:    slli a3, a2, 10
+; RV32-NEXT:    bgez a3, .LBB27_59
+; RV32-NEXT:  .LBB27_189: # %cond.store105
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    lui a5, 2
+; RV32-NEXT:    addi a5, a5, -640
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 1260(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 106(a0)
+; RV32-NEXT:    slli a3, a2, 9
+; RV32-NEXT:    bgez a3, .LBB27_60
+; RV32-NEXT:  .LBB27_190: # %cond.store107
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    li a5, 29
+; RV32-NEXT:    slli a5, a5, 8
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 1134(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 108(a0)
+; RV32-NEXT:    slli a3, a2, 8
+; RV32-NEXT:    bgez a3, .LBB27_61
+; RV32-NEXT:  .LBB27_191: # %cond.store109
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    lui a5, 2
+; RV32-NEXT:    addi a5, a5, -896
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 1008(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 110(a0)
+; RV32-NEXT:    slli a3, a2, 7
+; RV32-NEXT:    bgez a3, .LBB27_62
+; RV32-NEXT:  .LBB27_192: # %cond.store111
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    li a5, 7
+; RV32-NEXT:    slli a5, a5, 10
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 882(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 112(a0)
+; RV32-NEXT:    slli a3, a2, 6
+; RV32-NEXT:    bgez a3, .LBB27_63
+; RV32-NEXT:  .LBB27_193: # %cond.store113
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    lui a5, 2
+; RV32-NEXT:    addi a5, a5, -1152
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 756(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 114(a0)
+; RV32-NEXT:    slli a3, a2, 5
+; RV32-NEXT:    bgez a3, .LBB27_64
+; RV32-NEXT:  .LBB27_194: # %cond.store115
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    li a5, 27
+; RV32-NEXT:    slli a5, a5, 8
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 630(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 116(a0)
+; RV32-NEXT:    slli a3, a2, 4
+; RV32-NEXT:    bgez a3, .LBB27_65
+; RV32-NEXT:  .LBB27_195: # %cond.store117
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    lui a5, 2
+; RV32-NEXT:    addi a5, a5, -1408
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 504(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 118(a0)
+; RV32-NEXT:    slli a3, a2, 3
+; RV32-NEXT:    bgez a3, .LBB27_66
+; RV32-NEXT:  .LBB27_196: # %cond.store119
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    li a5, 13
+; RV32-NEXT:    slli a5, a5, 9
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a3, 378(a4)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 120(a0)
+; RV32-NEXT:    slli a3, a2, 2
+; RV32-NEXT:    bltz a3, .LBB27_67
+; RV32-NEXT:    j .LBB27_68
+; RV32-NEXT:  .LBB27_197: # %cond.store125
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    lui a5, 2
+; RV32-NEXT:    addi a5, a5, -1920
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v8, (a5)
+; RV32-NEXT:    lh a2, 0(a4)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 126(a0)
+; RV32-NEXT:    andi a2, a3, 1
+; RV32-NEXT:    beqz a2, .LBB27_72
+; RV32-NEXT:  .LBB27_198: # %cond.store127
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vmv.x.s a2, v16
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 128(a0)
+; RV32-NEXT:    andi a2, a3, 2
+; RV32-NEXT:    beqz a2, .LBB27_73
+; RV32-NEXT:  .LBB27_199: # %cond.store129
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v8, v16, 1
+; RV32-NEXT:    vmv.x.s a2, v8
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 130(a0)
+; RV32-NEXT:    andi a2, a3, 4
+; RV32-NEXT:    beqz a2, .LBB27_74
+; RV32-NEXT:  .LBB27_200: # %cond.store131
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v8, v16, 2
+; RV32-NEXT:    vmv.x.s a2, v8
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 132(a0)
+; RV32-NEXT:    andi a2, a3, 8
+; RV32-NEXT:    beqz a2, .LBB27_75
+; RV32-NEXT:  .LBB27_201: # %cond.store133
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v8, v16, 3
+; RV32-NEXT:    vmv.x.s a2, v8
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 134(a0)
+; RV32-NEXT:    andi a2, a3, 16
+; RV32-NEXT:    beqz a2, .LBB27_76
+; RV32-NEXT:  .LBB27_202: # %cond.store135
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v8, v16, 4
+; RV32-NEXT:    vmv.x.s a2, v8
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 136(a0)
+; RV32-NEXT:    andi a2, a3, 32
+; RV32-NEXT:    beqz a2, .LBB27_77
+; RV32-NEXT:  .LBB27_203: # %cond.store137
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v8, v16, 5
+; RV32-NEXT:    vmv.x.s a2, v8
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 138(a0)
+; RV32-NEXT:    andi a2, a3, 64
+; RV32-NEXT:    beqz a2, .LBB27_78
+; RV32-NEXT:  .LBB27_204: # %cond.store139
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v8, v16, 6
+; RV32-NEXT:    vmv.x.s a2, v8
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 140(a0)
+; RV32-NEXT:    andi a2, a3, 128
+; RV32-NEXT:    beqz a2, .LBB27_79
+; RV32-NEXT:  .LBB27_205: # %cond.store141
+; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v8, v16, 7
+; RV32-NEXT:    vmv.x.s a2, v8
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 142(a0)
+; RV32-NEXT:    andi a2, a3, 256
+; RV32-NEXT:    beqz a2, .LBB27_80
+; RV32-NEXT:  .LBB27_206: # %cond.store143
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v8, v16, 8
+; RV32-NEXT:    vmv.x.s a2, v8
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 144(a0)
+; RV32-NEXT:    andi a2, a3, 512
+; RV32-NEXT:    beqz a2, .LBB27_81
+; RV32-NEXT:  .LBB27_207: # %cond.store145
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v8, v16, 9
+; RV32-NEXT:    vmv.x.s a2, v8
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 146(a0)
+; RV32-NEXT:    andi a2, a3, 1024
+; RV32-NEXT:    beqz a2, .LBB27_82
+; RV32-NEXT:  .LBB27_208: # %cond.store147
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v8, v16, 10
+; RV32-NEXT:    vmv.x.s a2, v8
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 148(a0)
+; RV32-NEXT:    slli a2, a3, 20
+; RV32-NEXT:    bgez a2, .LBB27_83
+; RV32-NEXT:  .LBB27_209: # %cond.store149
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v8, v16, 11
+; RV32-NEXT:    vmv.x.s a2, v8
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 150(a0)
+; RV32-NEXT:    slli a2, a3, 19
+; RV32-NEXT:    bgez a2, .LBB27_84
+; RV32-NEXT:  .LBB27_210: # %cond.store151
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v8, v16, 12
+; RV32-NEXT:    vmv.x.s a2, v8
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 152(a0)
+; RV32-NEXT:    slli a2, a3, 18
+; RV32-NEXT:    bgez a2, .LBB27_85
+; RV32-NEXT:  .LBB27_211: # %cond.store153
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v8, v16, 13
+; RV32-NEXT:    vmv.x.s a2, v8
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 154(a0)
+; RV32-NEXT:    slli a2, a3, 17
+; RV32-NEXT:    bgez a2, .LBB27_86
+; RV32-NEXT:  .LBB27_212: # %cond.store155
+; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-NEXT:    vslidedown.vi v8, v16, 14
+; RV32-NEXT:    vmv.x.s a2, v8
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 156(a0)
+; RV32-NEXT:    slli a2, a3, 16
+; RV32-NEXT:    bltz a2, .LBB27_87
+; RV32-NEXT:    j .LBB27_88
+; RV32-NEXT:  .LBB27_213: # %cond.store159
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    li a5, 3
+; RV32-NEXT:    slli a5, a5, 11
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a5)
+; RV32-NEXT:    lh a4, 1890(a2)
+; RV32-NEXT:    fmv.h.x fa5, a4
+; RV32-NEXT:    fsh fa5, 160(a0)
+; RV32-NEXT:    slli a4, a3, 14
+; RV32-NEXT:    bgez a4, .LBB27_90
+; RV32-NEXT:  .LBB27_214: # %cond.store161
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    lui a5, 1
+; RV32-NEXT:    addi a5, a5, 1920
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a5)
+; RV32-NEXT:    lh a4, 1764(a2)
+; RV32-NEXT:    fmv.h.x fa5, a4
+; RV32-NEXT:    fsh fa5, 162(a0)
+; RV32-NEXT:    slli a4, a3, 13
+; RV32-NEXT:    bgez a4, .LBB27_91
+; RV32-NEXT:  .LBB27_215: # %cond.store163
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    li a5, 23
+; RV32-NEXT:    slli a5, a5, 8
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a5)
+; RV32-NEXT:    lh a4, 1638(a2)
+; RV32-NEXT:    fmv.h.x fa5, a4
+; RV32-NEXT:    fsh fa5, 164(a0)
+; RV32-NEXT:    slli a4, a3, 12
+; RV32-NEXT:    bgez a4, .LBB27_92
+; RV32-NEXT:  .LBB27_216: # %cond.store165
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    lui a5, 1
+; RV32-NEXT:    addi a5, a5, 1664
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a5)
+; RV32-NEXT:    lh a4, 1512(a2)
+; RV32-NEXT:    fmv.h.x fa5, a4
+; RV32-NEXT:    fsh fa5, 166(a0)
+; RV32-NEXT:    slli a4, a3, 11
+; RV32-NEXT:    bgez a4, .LBB27_93
+; RV32-NEXT:  .LBB27_217: # %cond.store167
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    li a5, 11
+; RV32-NEXT:    slli a5, a5, 9
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a5)
+; RV32-NEXT:    lh a4, 1386(a2)
+; RV32-NEXT:    fmv.h.x fa5, a4
+; RV32-NEXT:    fsh fa5, 168(a0)
+; RV32-NEXT:    slli a4, a3, 10
+; RV32-NEXT:    bgez a4, .LBB27_94
+; RV32-NEXT:  .LBB27_218: # %cond.store169
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    lui a5, 1
+; RV32-NEXT:    addi a5, a5, 1408
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a5)
+; RV32-NEXT:    lh a4, 1260(a2)
+; RV32-NEXT:    fmv.h.x fa5, a4
+; RV32-NEXT:    fsh fa5, 170(a0)
+; RV32-NEXT:    slli a4, a3, 9
+; RV32-NEXT:    bgez a4, .LBB27_95
+; RV32-NEXT:  .LBB27_219: # %cond.store171
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    li a5, 21
+; RV32-NEXT:    slli a5, a5, 8
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a5)
+; RV32-NEXT:    lh a4, 1134(a2)
+; RV32-NEXT:    fmv.h.x fa5, a4
+; RV32-NEXT:    fsh fa5, 172(a0)
+; RV32-NEXT:    slli a4, a3, 8
+; RV32-NEXT:    bgez a4, .LBB27_96
+; RV32-NEXT:  .LBB27_220: # %cond.store173
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    lui a5, 1
+; RV32-NEXT:    addi a5, a5, 1152
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a5)
+; RV32-NEXT:    lh a4, 1008(a2)
+; RV32-NEXT:    fmv.h.x fa5, a4
+; RV32-NEXT:    fsh fa5, 174(a0)
+; RV32-NEXT:    slli a4, a3, 7
+; RV32-NEXT:    bgez a4, .LBB27_97
+; RV32-NEXT:  .LBB27_221: # %cond.store175
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    li a5, 5
+; RV32-NEXT:    slli a5, a5, 10
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a5)
+; RV32-NEXT:    lh a4, 882(a2)
+; RV32-NEXT:    fmv.h.x fa5, a4
+; RV32-NEXT:    fsh fa5, 176(a0)
+; RV32-NEXT:    slli a4, a3, 6
+; RV32-NEXT:    bgez a4, .LBB27_98
+; RV32-NEXT:  .LBB27_222: # %cond.store177
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    lui a5, 1
+; RV32-NEXT:    addi a5, a5, 896
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a5)
+; RV32-NEXT:    lh a4, 756(a2)
+; RV32-NEXT:    fmv.h.x fa5, a4
+; RV32-NEXT:    fsh fa5, 178(a0)
+; RV32-NEXT:    slli a4, a3, 5
+; RV32-NEXT:    bgez a4, .LBB27_99
+; RV32-NEXT:  .LBB27_223: # %cond.store179
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    li a5, 19
+; RV32-NEXT:    slli a5, a5, 8
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a5)
+; RV32-NEXT:    lh a4, 630(a2)
+; RV32-NEXT:    fmv.h.x fa5, a4
+; RV32-NEXT:    fsh fa5, 180(a0)
+; RV32-NEXT:    slli a4, a3, 4
+; RV32-NEXT:    bgez a4, .LBB27_100
+; RV32-NEXT:  .LBB27_224: # %cond.store181
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    lui a5, 1
+; RV32-NEXT:    addi a5, a5, 640
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a5)
+; RV32-NEXT:    lh a4, 504(a2)
+; RV32-NEXT:    fmv.h.x fa5, a4
+; RV32-NEXT:    fsh fa5, 182(a0)
+; RV32-NEXT:    slli a4, a3, 3
+; RV32-NEXT:    bgez a4, .LBB27_101
+; RV32-NEXT:  .LBB27_225: # %cond.store183
+; RV32-NEXT:    li a4, 64
+; RV32-NEXT:    li a5, 9
+; RV32-NEXT:    slli a5, a5, 9
+; RV32-NEXT:    add a5, sp, a5
+; RV32-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a5)
+; RV32-NEXT:    lh a4, 378(a2)
+; RV32-NEXT:    fmv.h.x fa5, a4
+; RV32-NEXT:    fsh fa5, 184(a0)
+; RV32-NEXT:    slli a4, a3, 2
+; RV32-NEXT:    bltz a4, .LBB27_102
+; RV32-NEXT:    j .LBB27_103
+; RV32-NEXT:  .LBB27_226: # %cond.store191
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    lui a4, 1
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a4)
+; RV32-NEXT:    lh a3, 2016(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 192(a0)
+; RV32-NEXT:    andi a3, a1, 2
+; RV32-NEXT:    beqz a3, .LBB27_109
+; RV32-NEXT:  .LBB27_227: # %cond.store193
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 1921
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a4)
+; RV32-NEXT:    lh a3, 1890(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 194(a0)
+; RV32-NEXT:    andi a3, a1, 4
+; RV32-NEXT:    beqz a3, .LBB27_110
+; RV32-NEXT:  .LBB27_228: # %cond.store195
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 1793
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a4)
+; RV32-NEXT:    lh a3, 1764(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 196(a0)
+; RV32-NEXT:    andi a3, a1, 8
+; RV32-NEXT:    beqz a3, .LBB27_111
+; RV32-NEXT:  .LBB27_229: # %cond.store197
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 1665
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a4)
+; RV32-NEXT:    lh a3, 1638(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 198(a0)
+; RV32-NEXT:    andi a3, a1, 16
+; RV32-NEXT:    beqz a3, .LBB27_112
+; RV32-NEXT:  .LBB27_230: # %cond.store199
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 1537
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a4)
+; RV32-NEXT:    lh a3, 1512(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 200(a0)
+; RV32-NEXT:    andi a3, a1, 32
+; RV32-NEXT:    beqz a3, .LBB27_113
+; RV32-NEXT:  .LBB27_231: # %cond.store201
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 1409
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a4)
+; RV32-NEXT:    lh a3, 1386(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 202(a0)
+; RV32-NEXT:    andi a3, a1, 64
+; RV32-NEXT:    beqz a3, .LBB27_114
+; RV32-NEXT:  .LBB27_232: # %cond.store203
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 1281
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a4)
+; RV32-NEXT:    lh a3, 1260(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 204(a0)
+; RV32-NEXT:    andi a3, a1, 128
+; RV32-NEXT:    beqz a3, .LBB27_115
+; RV32-NEXT:  .LBB27_233: # %cond.store205
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 1153
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a4)
+; RV32-NEXT:    lh a3, 1134(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 206(a0)
+; RV32-NEXT:    andi a3, a1, 256
+; RV32-NEXT:    beqz a3, .LBB27_116
+; RV32-NEXT:  .LBB27_234: # %cond.store207
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 1025
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a4)
+; RV32-NEXT:    lh a3, 1008(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 208(a0)
+; RV32-NEXT:    andi a3, a1, 512
+; RV32-NEXT:    beqz a3, .LBB27_117
+; RV32-NEXT:  .LBB27_235: # %cond.store209
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 897
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a4)
+; RV32-NEXT:    lh a3, 882(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 210(a0)
+; RV32-NEXT:    andi a3, a1, 1024
+; RV32-NEXT:    beqz a3, .LBB27_118
+; RV32-NEXT:  .LBB27_236: # %cond.store211
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 769
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a4)
+; RV32-NEXT:    lh a3, 756(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 212(a0)
+; RV32-NEXT:    slli a3, a1, 20
+; RV32-NEXT:    bgez a3, .LBB27_119
+; RV32-NEXT:  .LBB27_237: # %cond.store213
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 641
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a4)
+; RV32-NEXT:    lh a3, 630(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 214(a0)
+; RV32-NEXT:    slli a3, a1, 19
+; RV32-NEXT:    bgez a3, .LBB27_120
+; RV32-NEXT:  .LBB27_238: # %cond.store215
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 513
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a4)
+; RV32-NEXT:    lh a3, 504(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 216(a0)
+; RV32-NEXT:    slli a3, a1, 18
+; RV32-NEXT:    bgez a3, .LBB27_121
+; RV32-NEXT:  .LBB27_239: # %cond.store217
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 385
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a4)
+; RV32-NEXT:    lh a3, 378(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 218(a0)
+; RV32-NEXT:    slli a3, a1, 17
+; RV32-NEXT:    bgez a3, .LBB27_122
+; RV32-NEXT:  .LBB27_240: # %cond.store219
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 257
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a4)
+; RV32-NEXT:    lh a3, 252(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 220(a0)
+; RV32-NEXT:    slli a3, a1, 16
+; RV32-NEXT:    bgez a3, .LBB27_123
+; RV32-NEXT:  .LBB27_241: # %cond.store221
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 129
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a4)
+; RV32-NEXT:    lh a3, 126(a2)
+; RV32-NEXT:    fmv.h.x fa5, a3
+; RV32-NEXT:    fsh fa5, 222(a0)
+; RV32-NEXT:    slli a3, a1, 15
+; RV32-NEXT:    bltz a3, .LBB27_242
+; RV32-NEXT:    j .LBB27_124
+; RV32-NEXT:  .LBB27_242: # %cond.store223
+; RV32-NEXT:    li a3, 64
+; RV32-NEXT:    addi a4, sp, 2047
+; RV32-NEXT:    addi a4, a4, 1
+; RV32-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a4)
+; RV32-NEXT:    lh a2, 0(a2)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 224(a0)
+; RV32-NEXT:    slli a2, a1, 14
+; RV32-NEXT:    bltz a2, .LBB27_243
+; RV32-NEXT:    j .LBB27_125
+; RV32-NEXT:  .LBB27_243: # %cond.store225
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    addi a3, sp, 1920
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a3)
+; RV32-NEXT:    lh a2, 2018(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 226(a0)
+; RV32-NEXT:    slli a2, a1, 13
+; RV32-NEXT:    bltz a2, .LBB27_244
+; RV32-NEXT:    j .LBB27_126
+; RV32-NEXT:  .LBB27_244: # %cond.store227
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    addi a3, sp, 1792
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a3)
+; RV32-NEXT:    lh a2, 1892(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 228(a0)
+; RV32-NEXT:    slli a2, a1, 12
+; RV32-NEXT:    bltz a2, .LBB27_245
+; RV32-NEXT:    j .LBB27_127
+; RV32-NEXT:  .LBB27_245: # %cond.store229
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    addi a3, sp, 1664
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a3)
+; RV32-NEXT:    lh a2, 1766(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 230(a0)
+; RV32-NEXT:    slli a2, a1, 11
+; RV32-NEXT:    bltz a2, .LBB27_246
+; RV32-NEXT:    j .LBB27_128
+; RV32-NEXT:  .LBB27_246: # %cond.store231
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    addi a3, sp, 1536
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a3)
+; RV32-NEXT:    lh a2, 1640(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 232(a0)
+; RV32-NEXT:    slli a2, a1, 10
+; RV32-NEXT:    bltz a2, .LBB27_247
+; RV32-NEXT:    j .LBB27_129
+; RV32-NEXT:  .LBB27_247: # %cond.store233
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    addi a3, sp, 1408
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a3)
+; RV32-NEXT:    lh a2, 1514(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 234(a0)
+; RV32-NEXT:    slli a2, a1, 9
+; RV32-NEXT:    bltz a2, .LBB27_248
+; RV32-NEXT:    j .LBB27_130
+; RV32-NEXT:  .LBB27_248: # %cond.store235
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    addi a3, sp, 1280
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a3)
+; RV32-NEXT:    lh a2, 1388(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 236(a0)
+; RV32-NEXT:    slli a2, a1, 8
+; RV32-NEXT:    bltz a2, .LBB27_249
+; RV32-NEXT:    j .LBB27_131
+; RV32-NEXT:  .LBB27_249: # %cond.store237
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    addi a3, sp, 1152
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a3)
+; RV32-NEXT:    lh a2, 1262(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 238(a0)
+; RV32-NEXT:    slli a2, a1, 7
+; RV32-NEXT:    bltz a2, .LBB27_250
+; RV32-NEXT:    j .LBB27_132
+; RV32-NEXT:  .LBB27_250: # %cond.store239
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    addi a3, sp, 1024
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a3)
+; RV32-NEXT:    lh a2, 1136(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 240(a0)
+; RV32-NEXT:    slli a2, a1, 6
+; RV32-NEXT:    bltz a2, .LBB27_251
+; RV32-NEXT:    j .LBB27_133
+; RV32-NEXT:  .LBB27_251: # %cond.store241
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    addi a3, sp, 896
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a3)
+; RV32-NEXT:    lh a2, 1010(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 242(a0)
+; RV32-NEXT:    slli a2, a1, 5
+; RV32-NEXT:    bltz a2, .LBB27_252
+; RV32-NEXT:    j .LBB27_134
+; RV32-NEXT:  .LBB27_252: # %cond.store243
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    addi a3, sp, 768
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a3)
+; RV32-NEXT:    lh a2, 884(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 244(a0)
+; RV32-NEXT:    slli a2, a1, 4
+; RV32-NEXT:    bltz a2, .LBB27_253
+; RV32-NEXT:    j .LBB27_135
+; RV32-NEXT:  .LBB27_253: # %cond.store245
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    addi a3, sp, 640
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a3)
+; RV32-NEXT:    lh a2, 758(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 246(a0)
+; RV32-NEXT:    slli a2, a1, 3
+; RV32-NEXT:    bltz a2, .LBB27_254
+; RV32-NEXT:    j .LBB27_136
+; RV32-NEXT:  .LBB27_254: # %cond.store247
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    addi a3, sp, 512
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a3)
+; RV32-NEXT:    lh a2, 632(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 248(a0)
+; RV32-NEXT:    slli a2, a1, 2
+; RV32-NEXT:    bltz a2, .LBB27_255
+; RV32-NEXT:    j .LBB27_137
+; RV32-NEXT:  .LBB27_255: # %cond.store249
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    addi a3, sp, 384
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a3)
+; RV32-NEXT:    lh a2, 506(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 250(a0)
+; RV32-NEXT:    slli a2, a1, 1
+; RV32-NEXT:    bltz a2, .LBB27_256
+; RV32-NEXT:    j .LBB27_138
+; RV32-NEXT:  .LBB27_256: # %cond.store251
+; RV32-NEXT:    li a2, 64
+; RV32-NEXT:    addi a3, sp, 256
+; RV32-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-NEXT:    vse16.v v16, (a3)
+; RV32-NEXT:    lh a2, 380(sp)
+; RV32-NEXT:    fmv.h.x fa5, a2
+; RV32-NEXT:    fsh fa5, 252(a0)
+; RV32-NEXT:    bgez a1, .LBB27_257
+; RV32-NEXT:    j .LBB27_139
+; RV32-NEXT:  .LBB27_257: # %cond.store251
+; RV32-NEXT:    j .LBB27_140
+;
+; RV64-LABEL: masked_store_v128bf16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT:    vmv.x.s a2, v0
+; RV64-NEXT:    andi a1, a2, 1
+; RV64-NEXT:    bnez a1, .LBB27_137
+; RV64-NEXT:  # %bb.1: # %else
+; RV64-NEXT:    andi a1, a2, 2
+; RV64-NEXT:    bnez a1, .LBB27_138
+; RV64-NEXT:  .LBB27_2: # %else2
+; RV64-NEXT:    andi a1, a2, 4
+; RV64-NEXT:    bnez a1, .LBB27_139
+; RV64-NEXT:  .LBB27_3: # %else4
+; RV64-NEXT:    andi a1, a2, 8
+; RV64-NEXT:    bnez a1, .LBB27_140
+; RV64-NEXT:  .LBB27_4: # %else6
+; RV64-NEXT:    andi a1, a2, 16
+; RV64-NEXT:    bnez a1, .LBB27_141
+; RV64-NEXT:  .LBB27_5: # %else8
+; RV64-NEXT:    andi a1, a2, 32
+; RV64-NEXT:    bnez a1, .LBB27_142
+; RV64-NEXT:  .LBB27_6: # %else10
+; RV64-NEXT:    andi a1, a2, 64
+; RV64-NEXT:    bnez a1, .LBB27_143
+; RV64-NEXT:  .LBB27_7: # %else12
+; RV64-NEXT:    andi a1, a2, 128
+; RV64-NEXT:    bnez a1, .LBB27_144
+; RV64-NEXT:  .LBB27_8: # %else14
+; RV64-NEXT:    andi a1, a2, 256
+; RV64-NEXT:    bnez a1, .LBB27_145
+; RV64-NEXT:  .LBB27_9: # %else16
+; RV64-NEXT:    andi a1, a2, 512
+; RV64-NEXT:    bnez a1, .LBB27_146
+; RV64-NEXT:  .LBB27_10: # %else18
+; RV64-NEXT:    andi a1, a2, 1024
+; RV64-NEXT:    bnez a1, .LBB27_147
+; RV64-NEXT:  .LBB27_11: # %else20
+; RV64-NEXT:    slli a1, a2, 52
+; RV64-NEXT:    bltz a1, .LBB27_148
+; RV64-NEXT:  .LBB27_12: # %else22
+; RV64-NEXT:    slli a1, a2, 51
+; RV64-NEXT:    bltz a1, .LBB27_149
+; RV64-NEXT:  .LBB27_13: # %else24
+; RV64-NEXT:    slli a1, a2, 50
+; RV64-NEXT:    bltz a1, .LBB27_150
+; RV64-NEXT:  .LBB27_14: # %else26
+; RV64-NEXT:    slli a1, a2, 49
+; RV64-NEXT:    bltz a1, .LBB27_151
+; RV64-NEXT:  .LBB27_15: # %else28
+; RV64-NEXT:    slli a1, a2, 48
+; RV64-NEXT:    bgez a1, .LBB27_17
+; RV64-NEXT:  .LBB27_16: # %cond.store29
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v24, v8, 15
+; RV64-NEXT:    vmv.x.s a1, v24
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 30(a0)
+; RV64-NEXT:  .LBB27_17: # %else30
+; RV64-NEXT:    addi sp, sp, -2032
+; RV64-NEXT:    .cfi_def_cfa_offset 2032
+; RV64-NEXT:    sd ra, 2024(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s0, 2016(sp) # 8-byte Folded Spill
+; RV64-NEXT:    .cfi_offset ra, -8
+; RV64-NEXT:    .cfi_offset s0, -16
+; RV64-NEXT:    addi s0, sp, 2032
+; RV64-NEXT:    .cfi_def_cfa s0, 0
+; RV64-NEXT:    .cfi_remember_state
+; RV64-NEXT:    lui a1, 3
+; RV64-NEXT:    addiw a1, a1, -1776
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    andi sp, sp, -128
+; RV64-NEXT:    slli a3, a2, 47
+; RV64-NEXT:    lui a1, 3
+; RV64-NEXT:    addiw a1, a1, -1606
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    bltz a3, .LBB27_152
+; RV64-NEXT:  # %bb.18: # %else32
+; RV64-NEXT:    slli a3, a2, 46
+; RV64-NEXT:    bltz a3, .LBB27_153
+; RV64-NEXT:  .LBB27_19: # %else34
+; RV64-NEXT:    slli a3, a2, 45
+; RV64-NEXT:    bltz a3, .LBB27_154
+; RV64-NEXT:  .LBB27_20: # %else36
+; RV64-NEXT:    slli a3, a2, 44
+; RV64-NEXT:    bltz a3, .LBB27_155
+; RV64-NEXT:  .LBB27_21: # %else38
+; RV64-NEXT:    slli a3, a2, 43
+; RV64-NEXT:    bltz a3, .LBB27_156
+; RV64-NEXT:  .LBB27_22: # %else40
+; RV64-NEXT:    slli a3, a2, 42
+; RV64-NEXT:    bltz a3, .LBB27_157
+; RV64-NEXT:  .LBB27_23: # %else42
+; RV64-NEXT:    slli a3, a2, 41
+; RV64-NEXT:    bltz a3, .LBB27_158
+; RV64-NEXT:  .LBB27_24: # %else44
+; RV64-NEXT:    slli a3, a2, 40
+; RV64-NEXT:    bltz a3, .LBB27_159
+; RV64-NEXT:  .LBB27_25: # %else46
+; RV64-NEXT:    slli a3, a2, 39
+; RV64-NEXT:    bltz a3, .LBB27_160
+; RV64-NEXT:  .LBB27_26: # %else48
+; RV64-NEXT:    slli a3, a2, 38
+; RV64-NEXT:    bltz a3, .LBB27_161
+; RV64-NEXT:  .LBB27_27: # %else50
+; RV64-NEXT:    slli a3, a2, 37
+; RV64-NEXT:    bltz a3, .LBB27_162
+; RV64-NEXT:  .LBB27_28: # %else52
+; RV64-NEXT:    slli a3, a2, 36
+; RV64-NEXT:    bltz a3, .LBB27_163
+; RV64-NEXT:  .LBB27_29: # %else54
+; RV64-NEXT:    slli a3, a2, 35
+; RV64-NEXT:    bltz a3, .LBB27_164
+; RV64-NEXT:  .LBB27_30: # %else56
+; RV64-NEXT:    slli a3, a2, 34
+; RV64-NEXT:    bgez a3, .LBB27_32
+; RV64-NEXT:  .LBB27_31: # %cond.store57
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 3
+; RV64-NEXT:    addiw a4, a4, -1664
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a1, 0(a1)
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 58(a0)
+; RV64-NEXT:  .LBB27_32: # %else58
+; RV64-NEXT:    slli a3, a2, 33
+; RV64-NEXT:    lui a1, 2
+; RV64-NEXT:    addiw a1, a1, 348
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    bltz a3, .LBB27_165
+; RV64-NEXT:  # %bb.33: # %else60
+; RV64-NEXT:    slli a3, a2, 32
+; RV64-NEXT:    bltz a3, .LBB27_166
+; RV64-NEXT:  .LBB27_34: # %else62
+; RV64-NEXT:    slli a3, a2, 31
+; RV64-NEXT:    bltz a3, .LBB27_167
+; RV64-NEXT:  .LBB27_35: # %else64
+; RV64-NEXT:    slli a3, a2, 30
+; RV64-NEXT:    bltz a3, .LBB27_168
+; RV64-NEXT:  .LBB27_36: # %else66
+; RV64-NEXT:    slli a3, a2, 29
+; RV64-NEXT:    bltz a3, .LBB27_169
+; RV64-NEXT:  .LBB27_37: # %else68
+; RV64-NEXT:    slli a3, a2, 28
+; RV64-NEXT:    bltz a3, .LBB27_170
+; RV64-NEXT:  .LBB27_38: # %else70
+; RV64-NEXT:    slli a3, a2, 27
+; RV64-NEXT:    bltz a3, .LBB27_171
+; RV64-NEXT:  .LBB27_39: # %else72
+; RV64-NEXT:    slli a3, a2, 26
+; RV64-NEXT:    bltz a3, .LBB27_172
+; RV64-NEXT:  .LBB27_40: # %else74
+; RV64-NEXT:    slli a3, a2, 25
+; RV64-NEXT:    bltz a3, .LBB27_173
+; RV64-NEXT:  .LBB27_41: # %else76
+; RV64-NEXT:    slli a3, a2, 24
+; RV64-NEXT:    bltz a3, .LBB27_174
+; RV64-NEXT:  .LBB27_42: # %else78
+; RV64-NEXT:    slli a3, a2, 23
+; RV64-NEXT:    bltz a3, .LBB27_175
+; RV64-NEXT:  .LBB27_43: # %else80
+; RV64-NEXT:    slli a3, a2, 22
+; RV64-NEXT:    bltz a3, .LBB27_176
+; RV64-NEXT:  .LBB27_44: # %else82
+; RV64-NEXT:    slli a3, a2, 21
+; RV64-NEXT:    bltz a3, .LBB27_177
+; RV64-NEXT:  .LBB27_45: # %else84
+; RV64-NEXT:    slli a3, a2, 20
+; RV64-NEXT:    bltz a3, .LBB27_178
+; RV64-NEXT:  .LBB27_46: # %else86
+; RV64-NEXT:    slli a3, a2, 19
+; RV64-NEXT:    bltz a3, .LBB27_179
+; RV64-NEXT:  .LBB27_47: # %else88
+; RV64-NEXT:    slli a3, a2, 18
+; RV64-NEXT:    bltz a3, .LBB27_180
+; RV64-NEXT:  .LBB27_48: # %else90
+; RV64-NEXT:    slli a3, a2, 17
+; RV64-NEXT:    bgez a3, .LBB27_50
+; RV64-NEXT:  .LBB27_49: # %cond.store91
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 2
+; RV64-NEXT:    addiw a4, a4, 256
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a1, 0(a1)
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 92(a0)
+; RV64-NEXT:  .LBB27_50: # %else92
+; RV64-NEXT:    slli a1, a2, 16
+; RV64-NEXT:    lui a3, 2
+; RV64-NEXT:    addiw a3, a3, -1794
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    bltz a1, .LBB27_181
+; RV64-NEXT:  # %bb.51: # %else94
+; RV64-NEXT:    slli a1, a2, 15
+; RV64-NEXT:    bltz a1, .LBB27_182
+; RV64-NEXT:  .LBB27_52: # %else96
+; RV64-NEXT:    slli a1, a2, 14
+; RV64-NEXT:    bltz a1, .LBB27_183
+; RV64-NEXT:  .LBB27_53: # %else98
+; RV64-NEXT:    slli a1, a2, 13
+; RV64-NEXT:    bltz a1, .LBB27_184
+; RV64-NEXT:  .LBB27_54: # %else100
+; RV64-NEXT:    slli a1, a2, 12
+; RV64-NEXT:    bltz a1, .LBB27_185
+; RV64-NEXT:  .LBB27_55: # %else102
+; RV64-NEXT:    slli a1, a2, 11
+; RV64-NEXT:    bltz a1, .LBB27_186
+; RV64-NEXT:  .LBB27_56: # %else104
+; RV64-NEXT:    slli a1, a2, 10
+; RV64-NEXT:    bltz a1, .LBB27_187
+; RV64-NEXT:  .LBB27_57: # %else106
+; RV64-NEXT:    slli a1, a2, 9
+; RV64-NEXT:    bltz a1, .LBB27_188
+; RV64-NEXT:  .LBB27_58: # %else108
+; RV64-NEXT:    slli a1, a2, 8
+; RV64-NEXT:    bltz a1, .LBB27_189
+; RV64-NEXT:  .LBB27_59: # %else110
+; RV64-NEXT:    slli a1, a2, 7
+; RV64-NEXT:    bltz a1, .LBB27_190
+; RV64-NEXT:  .LBB27_60: # %else112
+; RV64-NEXT:    slli a1, a2, 6
+; RV64-NEXT:    bltz a1, .LBB27_191
+; RV64-NEXT:  .LBB27_61: # %else114
+; RV64-NEXT:    slli a1, a2, 5
+; RV64-NEXT:    bltz a1, .LBB27_192
+; RV64-NEXT:  .LBB27_62: # %else116
+; RV64-NEXT:    slli a1, a2, 4
+; RV64-NEXT:    bltz a1, .LBB27_193
+; RV64-NEXT:  .LBB27_63: # %else118
+; RV64-NEXT:    slli a1, a2, 3
+; RV64-NEXT:    bltz a1, .LBB27_194
+; RV64-NEXT:  .LBB27_64: # %else120
+; RV64-NEXT:    slli a1, a2, 2
+; RV64-NEXT:    bgez a1, .LBB27_66
+; RV64-NEXT:  .LBB27_65: # %cond.store121
+; RV64-NEXT:    li a1, 64
+; RV64-NEXT:    lui a4, 2
+; RV64-NEXT:    addiw a4, a4, -1664
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a1, 252(a3)
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 122(a0)
+; RV64-NEXT:  .LBB27_66: # %else122
+; RV64-NEXT:    slli a1, a2, 1
+; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v24, v0, 1
+; RV64-NEXT:    bgez a1, .LBB27_68
+; RV64-NEXT:  # %bb.67: # %cond.store123
+; RV64-NEXT:    li a1, 64
+; RV64-NEXT:    li a4, 25
+; RV64-NEXT:    slli a4, a4, 8
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a1, 126(a3)
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 124(a0)
+; RV64-NEXT:  .LBB27_68: # %else124
+; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT:    vmv.x.s a1, v24
+; RV64-NEXT:    bltz a2, .LBB27_195
+; RV64-NEXT:  # %bb.69: # %else126
+; RV64-NEXT:    andi a2, a1, 1
+; RV64-NEXT:    bnez a2, .LBB27_196
+; RV64-NEXT:  .LBB27_70: # %else128
+; RV64-NEXT:    andi a2, a1, 2
+; RV64-NEXT:    bnez a2, .LBB27_197
+; RV64-NEXT:  .LBB27_71: # %else130
+; RV64-NEXT:    andi a2, a1, 4
+; RV64-NEXT:    bnez a2, .LBB27_198
+; RV64-NEXT:  .LBB27_72: # %else132
+; RV64-NEXT:    andi a2, a1, 8
+; RV64-NEXT:    bnez a2, .LBB27_199
+; RV64-NEXT:  .LBB27_73: # %else134
+; RV64-NEXT:    andi a2, a1, 16
+; RV64-NEXT:    bnez a2, .LBB27_200
+; RV64-NEXT:  .LBB27_74: # %else136
+; RV64-NEXT:    andi a2, a1, 32
+; RV64-NEXT:    bnez a2, .LBB27_201
+; RV64-NEXT:  .LBB27_75: # %else138
+; RV64-NEXT:    andi a2, a1, 64
+; RV64-NEXT:    bnez a2, .LBB27_202
+; RV64-NEXT:  .LBB27_76: # %else140
+; RV64-NEXT:    andi a2, a1, 128
+; RV64-NEXT:    bnez a2, .LBB27_203
+; RV64-NEXT:  .LBB27_77: # %else142
+; RV64-NEXT:    andi a2, a1, 256
+; RV64-NEXT:    bnez a2, .LBB27_204
+; RV64-NEXT:  .LBB27_78: # %else144
+; RV64-NEXT:    andi a2, a1, 512
+; RV64-NEXT:    bnez a2, .LBB27_205
+; RV64-NEXT:  .LBB27_79: # %else146
+; RV64-NEXT:    andi a2, a1, 1024
+; RV64-NEXT:    bnez a2, .LBB27_206
+; RV64-NEXT:  .LBB27_80: # %else148
+; RV64-NEXT:    slli a2, a1, 52
+; RV64-NEXT:    bltz a2, .LBB27_207
+; RV64-NEXT:  .LBB27_81: # %else150
+; RV64-NEXT:    slli a2, a1, 51
+; RV64-NEXT:    bltz a2, .LBB27_208
+; RV64-NEXT:  .LBB27_82: # %else152
+; RV64-NEXT:    slli a2, a1, 50
+; RV64-NEXT:    bltz a2, .LBB27_209
+; RV64-NEXT:  .LBB27_83: # %else154
+; RV64-NEXT:    slli a2, a1, 49
+; RV64-NEXT:    bltz a2, .LBB27_210
+; RV64-NEXT:  .LBB27_84: # %else156
+; RV64-NEXT:    slli a2, a1, 48
+; RV64-NEXT:    bgez a2, .LBB27_86
+; RV64-NEXT:  .LBB27_85: # %cond.store157
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v8, v16, 15
+; RV64-NEXT:    vmv.x.s a2, v8
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 158(a0)
+; RV64-NEXT:  .LBB27_86: # %else158
+; RV64-NEXT:    slli a3, a1, 47
+; RV64-NEXT:    lui a2, 1
+; RV64-NEXT:    addiw a2, a2, 190
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    bltz a3, .LBB27_211
+; RV64-NEXT:  # %bb.87: # %else160
+; RV64-NEXT:    slli a3, a1, 46
+; RV64-NEXT:    bltz a3, .LBB27_212
+; RV64-NEXT:  .LBB27_88: # %else162
+; RV64-NEXT:    slli a3, a1, 45
+; RV64-NEXT:    bltz a3, .LBB27_213
+; RV64-NEXT:  .LBB27_89: # %else164
+; RV64-NEXT:    slli a3, a1, 44
+; RV64-NEXT:    bltz a3, .LBB27_214
+; RV64-NEXT:  .LBB27_90: # %else166
+; RV64-NEXT:    slli a3, a1, 43
+; RV64-NEXT:    bltz a3, .LBB27_215
+; RV64-NEXT:  .LBB27_91: # %else168
+; RV64-NEXT:    slli a3, a1, 42
+; RV64-NEXT:    bltz a3, .LBB27_216
+; RV64-NEXT:  .LBB27_92: # %else170
+; RV64-NEXT:    slli a3, a1, 41
+; RV64-NEXT:    bltz a3, .LBB27_217
+; RV64-NEXT:  .LBB27_93: # %else172
+; RV64-NEXT:    slli a3, a1, 40
+; RV64-NEXT:    bltz a3, .LBB27_218
+; RV64-NEXT:  .LBB27_94: # %else174
+; RV64-NEXT:    slli a3, a1, 39
+; RV64-NEXT:    bltz a3, .LBB27_219
+; RV64-NEXT:  .LBB27_95: # %else176
+; RV64-NEXT:    slli a3, a1, 38
+; RV64-NEXT:    bltz a3, .LBB27_220
+; RV64-NEXT:  .LBB27_96: # %else178
+; RV64-NEXT:    slli a3, a1, 37
+; RV64-NEXT:    bltz a3, .LBB27_221
+; RV64-NEXT:  .LBB27_97: # %else180
+; RV64-NEXT:    slli a3, a1, 36
+; RV64-NEXT:    bltz a3, .LBB27_222
+; RV64-NEXT:  .LBB27_98: # %else182
+; RV64-NEXT:    slli a3, a1, 35
+; RV64-NEXT:    bltz a3, .LBB27_223
+; RV64-NEXT:  .LBB27_99: # %else184
+; RV64-NEXT:    slli a3, a1, 34
+; RV64-NEXT:    bltz a3, .LBB27_224
+; RV64-NEXT:  .LBB27_100: # %else186
+; RV64-NEXT:    slli a3, a1, 33
+; RV64-NEXT:    bltz a3, .LBB27_225
+; RV64-NEXT:  .LBB27_101: # %else188
+; RV64-NEXT:    slli a3, a1, 32
+; RV64-NEXT:    bgez a3, .LBB27_103
+; RV64-NEXT:  .LBB27_102: # %cond.store189
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 1
+; RV64-NEXT:    addiw a4, a4, 128
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a2, 0(a2)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 190(a0)
+; RV64-NEXT:  .LBB27_103: # %else190
+; RV64-NEXT:    slli a3, a1, 31
+; RV64-NEXT:    addi a2, sp, 2047
+; RV64-NEXT:    addi a2, a2, 97
+; RV64-NEXT:    bltz a3, .LBB27_226
+; RV64-NEXT:  # %bb.104: # %else192
+; RV64-NEXT:    slli a3, a1, 30
+; RV64-NEXT:    bltz a3, .LBB27_227
+; RV64-NEXT:  .LBB27_105: # %else194
+; RV64-NEXT:    slli a3, a1, 29
+; RV64-NEXT:    bltz a3, .LBB27_228
+; RV64-NEXT:  .LBB27_106: # %else196
+; RV64-NEXT:    slli a3, a1, 28
+; RV64-NEXT:    bltz a3, .LBB27_229
+; RV64-NEXT:  .LBB27_107: # %else198
+; RV64-NEXT:    slli a3, a1, 27
+; RV64-NEXT:    bltz a3, .LBB27_230
+; RV64-NEXT:  .LBB27_108: # %else200
+; RV64-NEXT:    slli a3, a1, 26
+; RV64-NEXT:    bltz a3, .LBB27_231
+; RV64-NEXT:  .LBB27_109: # %else202
+; RV64-NEXT:    slli a3, a1, 25
+; RV64-NEXT:    bltz a3, .LBB27_232
+; RV64-NEXT:  .LBB27_110: # %else204
+; RV64-NEXT:    slli a3, a1, 24
+; RV64-NEXT:    bltz a3, .LBB27_233
+; RV64-NEXT:  .LBB27_111: # %else206
+; RV64-NEXT:    slli a3, a1, 23
+; RV64-NEXT:    bltz a3, .LBB27_234
+; RV64-NEXT:  .LBB27_112: # %else208
+; RV64-NEXT:    slli a3, a1, 22
+; RV64-NEXT:    bgez a3, .LBB27_113
+; RV64-NEXT:    j .LBB27_235
+; RV64-NEXT:  .LBB27_113: # %else210
+; RV64-NEXT:    slli a3, a1, 21
+; RV64-NEXT:    bgez a3, .LBB27_114
+; RV64-NEXT:    j .LBB27_236
+; RV64-NEXT:  .LBB27_114: # %else212
+; RV64-NEXT:    slli a3, a1, 20
+; RV64-NEXT:    bgez a3, .LBB27_115
+; RV64-NEXT:    j .LBB27_237
+; RV64-NEXT:  .LBB27_115: # %else214
+; RV64-NEXT:    slli a3, a1, 19
+; RV64-NEXT:    bgez a3, .LBB27_116
+; RV64-NEXT:    j .LBB27_238
+; RV64-NEXT:  .LBB27_116: # %else216
+; RV64-NEXT:    slli a3, a1, 18
+; RV64-NEXT:    bgez a3, .LBB27_117
+; RV64-NEXT:    j .LBB27_239
+; RV64-NEXT:  .LBB27_117: # %else218
+; RV64-NEXT:    slli a3, a1, 17
+; RV64-NEXT:    bgez a3, .LBB27_118
+; RV64-NEXT:    j .LBB27_240
+; RV64-NEXT:  .LBB27_118: # %else220
+; RV64-NEXT:    slli a3, a1, 16
+; RV64-NEXT:    bgez a3, .LBB27_119
+; RV64-NEXT:    j .LBB27_241
+; RV64-NEXT:  .LBB27_119: # %else222
+; RV64-NEXT:    slli a3, a1, 15
+; RV64-NEXT:    bgez a3, .LBB27_120
+; RV64-NEXT:    j .LBB27_242
+; RV64-NEXT:  .LBB27_120: # %else224
+; RV64-NEXT:    slli a2, a1, 14
+; RV64-NEXT:    bgez a2, .LBB27_121
+; RV64-NEXT:    j .LBB27_243
+; RV64-NEXT:  .LBB27_121: # %else226
+; RV64-NEXT:    slli a2, a1, 13
+; RV64-NEXT:    bgez a2, .LBB27_122
+; RV64-NEXT:    j .LBB27_244
+; RV64-NEXT:  .LBB27_122: # %else228
+; RV64-NEXT:    slli a2, a1, 12
+; RV64-NEXT:    bgez a2, .LBB27_123
+; RV64-NEXT:    j .LBB27_245
+; RV64-NEXT:  .LBB27_123: # %else230
+; RV64-NEXT:    slli a2, a1, 11
+; RV64-NEXT:    bgez a2, .LBB27_124
+; RV64-NEXT:    j .LBB27_246
+; RV64-NEXT:  .LBB27_124: # %else232
+; RV64-NEXT:    slli a2, a1, 10
+; RV64-NEXT:    bgez a2, .LBB27_125
+; RV64-NEXT:    j .LBB27_247
+; RV64-NEXT:  .LBB27_125: # %else234
+; RV64-NEXT:    slli a2, a1, 9
+; RV64-NEXT:    bgez a2, .LBB27_126
+; RV64-NEXT:    j .LBB27_248
+; RV64-NEXT:  .LBB27_126: # %else236
+; RV64-NEXT:    slli a2, a1, 8
+; RV64-NEXT:    bgez a2, .LBB27_127
+; RV64-NEXT:    j .LBB27_249
+; RV64-NEXT:  .LBB27_127: # %else238
+; RV64-NEXT:    slli a2, a1, 7
+; RV64-NEXT:    bgez a2, .LBB27_128
+; RV64-NEXT:    j .LBB27_250
+; RV64-NEXT:  .LBB27_128: # %else240
+; RV64-NEXT:    slli a2, a1, 6
+; RV64-NEXT:    bgez a2, .LBB27_129
+; RV64-NEXT:    j .LBB27_251
+; RV64-NEXT:  .LBB27_129: # %else242
+; RV64-NEXT:    slli a2, a1, 5
+; RV64-NEXT:    bgez a2, .LBB27_130
+; RV64-NEXT:    j .LBB27_252
+; RV64-NEXT:  .LBB27_130: # %else244
+; RV64-NEXT:    slli a2, a1, 4
+; RV64-NEXT:    bgez a2, .LBB27_131
+; RV64-NEXT:    j .LBB27_253
+; RV64-NEXT:  .LBB27_131: # %else246
+; RV64-NEXT:    slli a2, a1, 3
+; RV64-NEXT:    bgez a2, .LBB27_132
+; RV64-NEXT:    j .LBB27_254
+; RV64-NEXT:  .LBB27_132: # %else248
+; RV64-NEXT:    slli a2, a1, 2
+; RV64-NEXT:    bgez a2, .LBB27_133
+; RV64-NEXT:    j .LBB27_255
+; RV64-NEXT:  .LBB27_133: # %else250
+; RV64-NEXT:    slli a2, a1, 1
+; RV64-NEXT:    bgez a2, .LBB27_134
+; RV64-NEXT:    j .LBB27_256
+; RV64-NEXT:  .LBB27_134: # %else252
+; RV64-NEXT:    bgez a1, .LBB27_136
+; RV64-NEXT:  .LBB27_135: # %cond.store253
+; RV64-NEXT:    li a1, 64
+; RV64-NEXT:    addi a2, sp, 128
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a2)
+; RV64-NEXT:    lh a1, 254(sp)
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 254(a0)
+; RV64-NEXT:  .LBB27_136: # %else254
+; RV64-NEXT:    addi sp, s0, -2032
+; RV64-NEXT:    .cfi_def_cfa sp, 2032
+; RV64-NEXT:    ld ra, 2024(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 2016(sp) # 8-byte Folded Reload
+; RV64-NEXT:    .cfi_restore ra
+; RV64-NEXT:    .cfi_restore s0
+; RV64-NEXT:    addi sp, sp, 2032
+; RV64-NEXT:    .cfi_def_cfa_offset 0
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB27_137: # %cond.store
+; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; RV64-NEXT:    vmv.x.s a1, v8
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 0(a0)
+; RV64-NEXT:    andi a1, a2, 2
+; RV64-NEXT:    beqz a1, .LBB27_2
+; RV64-NEXT:  .LBB27_138: # %cond.store1
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v24, v8, 1
+; RV64-NEXT:    vmv.x.s a1, v24
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 2(a0)
+; RV64-NEXT:    andi a1, a2, 4
+; RV64-NEXT:    beqz a1, .LBB27_3
+; RV64-NEXT:  .LBB27_139: # %cond.store3
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v24, v8, 2
+; RV64-NEXT:    vmv.x.s a1, v24
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 4(a0)
+; RV64-NEXT:    andi a1, a2, 8
+; RV64-NEXT:    beqz a1, .LBB27_4
+; RV64-NEXT:  .LBB27_140: # %cond.store5
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v24, v8, 3
+; RV64-NEXT:    vmv.x.s a1, v24
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 6(a0)
+; RV64-NEXT:    andi a1, a2, 16
+; RV64-NEXT:    beqz a1, .LBB27_5
+; RV64-NEXT:  .LBB27_141: # %cond.store7
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v24, v8, 4
+; RV64-NEXT:    vmv.x.s a1, v24
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 8(a0)
+; RV64-NEXT:    andi a1, a2, 32
+; RV64-NEXT:    beqz a1, .LBB27_6
+; RV64-NEXT:  .LBB27_142: # %cond.store9
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v24, v8, 5
+; RV64-NEXT:    vmv.x.s a1, v24
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 10(a0)
+; RV64-NEXT:    andi a1, a2, 64
+; RV64-NEXT:    beqz a1, .LBB27_7
+; RV64-NEXT:  .LBB27_143: # %cond.store11
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v24, v8, 6
+; RV64-NEXT:    vmv.x.s a1, v24
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 12(a0)
+; RV64-NEXT:    andi a1, a2, 128
+; RV64-NEXT:    beqz a1, .LBB27_8
+; RV64-NEXT:  .LBB27_144: # %cond.store13
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v24, v8, 7
+; RV64-NEXT:    vmv.x.s a1, v24
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 14(a0)
+; RV64-NEXT:    andi a1, a2, 256
+; RV64-NEXT:    beqz a1, .LBB27_9
+; RV64-NEXT:  .LBB27_145: # %cond.store15
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v24, v8, 8
+; RV64-NEXT:    vmv.x.s a1, v24
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 16(a0)
+; RV64-NEXT:    andi a1, a2, 512
+; RV64-NEXT:    beqz a1, .LBB27_10
+; RV64-NEXT:  .LBB27_146: # %cond.store17
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v24, v8, 9
+; RV64-NEXT:    vmv.x.s a1, v24
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 18(a0)
+; RV64-NEXT:    andi a1, a2, 1024
+; RV64-NEXT:    beqz a1, .LBB27_11
+; RV64-NEXT:  .LBB27_147: # %cond.store19
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v24, v8, 10
+; RV64-NEXT:    vmv.x.s a1, v24
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 20(a0)
+; RV64-NEXT:    slli a1, a2, 52
+; RV64-NEXT:    bgez a1, .LBB27_12
+; RV64-NEXT:  .LBB27_148: # %cond.store21
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v24, v8, 11
+; RV64-NEXT:    vmv.x.s a1, v24
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 22(a0)
+; RV64-NEXT:    slli a1, a2, 51
+; RV64-NEXT:    bgez a1, .LBB27_13
+; RV64-NEXT:  .LBB27_149: # %cond.store23
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v24, v8, 12
+; RV64-NEXT:    vmv.x.s a1, v24
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 24(a0)
+; RV64-NEXT:    slli a1, a2, 50
+; RV64-NEXT:    bgez a1, .LBB27_14
+; RV64-NEXT:  .LBB27_150: # %cond.store25
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v24, v8, 13
+; RV64-NEXT:    vmv.x.s a1, v24
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 26(a0)
+; RV64-NEXT:    slli a1, a2, 49
+; RV64-NEXT:    bgez a1, .LBB27_15
+; RV64-NEXT:  .LBB27_151: # %cond.store27
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v24, v8, 14
+; RV64-NEXT:    vmv.x.s a1, v24
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 28(a0)
+; RV64-NEXT:    slli a1, a2, 48
+; RV64-NEXT:    bltz a1, .LBB27_16
+; RV64-NEXT:    j .LBB27_17
+; RV64-NEXT:  .LBB27_152: # %cond.store31
+; RV64-NEXT:    .cfi_restore_state
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 3
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1638(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 32(a0)
+; RV64-NEXT:    slli a3, a2, 46
+; RV64-NEXT:    bgez a3, .LBB27_19
+; RV64-NEXT:  .LBB27_153: # %cond.store33
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 3
+; RV64-NEXT:    addiw a4, a4, -128
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1512(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 34(a0)
+; RV64-NEXT:    slli a3, a2, 45
+; RV64-NEXT:    bgez a3, .LBB27_20
+; RV64-NEXT:  .LBB27_154: # %cond.store35
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 3
+; RV64-NEXT:    addiw a4, a4, -256
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1386(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 36(a0)
+; RV64-NEXT:    slli a3, a2, 44
+; RV64-NEXT:    bgez a3, .LBB27_21
+; RV64-NEXT:  .LBB27_155: # %cond.store37
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 3
+; RV64-NEXT:    addiw a4, a4, -384
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1260(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 38(a0)
+; RV64-NEXT:    slli a3, a2, 43
+; RV64-NEXT:    bgez a3, .LBB27_22
+; RV64-NEXT:  .LBB27_156: # %cond.store39
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    li a4, 23
+; RV64-NEXT:    slli a4, a4, 9
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1134(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 40(a0)
+; RV64-NEXT:    slli a3, a2, 42
+; RV64-NEXT:    bgez a3, .LBB27_23
+; RV64-NEXT:  .LBB27_157: # %cond.store41
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 3
+; RV64-NEXT:    addiw a4, a4, -640
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1008(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 42(a0)
+; RV64-NEXT:    slli a3, a2, 41
+; RV64-NEXT:    bgez a3, .LBB27_24
+; RV64-NEXT:  .LBB27_158: # %cond.store43
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 3
+; RV64-NEXT:    addiw a4, a4, -768
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 882(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 44(a0)
+; RV64-NEXT:    slli a3, a2, 40
+; RV64-NEXT:    bgez a3, .LBB27_25
+; RV64-NEXT:  .LBB27_159: # %cond.store45
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 3
+; RV64-NEXT:    addiw a4, a4, -896
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 756(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 46(a0)
+; RV64-NEXT:    slli a3, a2, 39
+; RV64-NEXT:    bgez a3, .LBB27_26
+; RV64-NEXT:  .LBB27_160: # %cond.store47
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    li a4, 11
+; RV64-NEXT:    slli a4, a4, 10
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 630(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 48(a0)
+; RV64-NEXT:    slli a3, a2, 38
+; RV64-NEXT:    bgez a3, .LBB27_27
+; RV64-NEXT:  .LBB27_161: # %cond.store49
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 3
+; RV64-NEXT:    addiw a4, a4, -1152
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 504(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 50(a0)
+; RV64-NEXT:    slli a3, a2, 37
+; RV64-NEXT:    bgez a3, .LBB27_28
+; RV64-NEXT:  .LBB27_162: # %cond.store51
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 3
+; RV64-NEXT:    addiw a4, a4, -1280
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 378(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 52(a0)
+; RV64-NEXT:    slli a3, a2, 36
+; RV64-NEXT:    bgez a3, .LBB27_29
+; RV64-NEXT:  .LBB27_163: # %cond.store53
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 3
+; RV64-NEXT:    addiw a4, a4, -1408
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 252(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 54(a0)
+; RV64-NEXT:    slli a3, a2, 35
+; RV64-NEXT:    bgez a3, .LBB27_30
+; RV64-NEXT:  .LBB27_164: # %cond.store55
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    li a4, 21
+; RV64-NEXT:    slli a4, a4, 9
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 126(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 56(a0)
+; RV64-NEXT:    slli a3, a2, 34
+; RV64-NEXT:    bltz a3, .LBB27_31
+; RV64-NEXT:    j .LBB27_32
+; RV64-NEXT:  .LBB27_165: # %cond.store59
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 3
+; RV64-NEXT:    addiw a4, a4, -1792
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 2016(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 60(a0)
+; RV64-NEXT:    slli a3, a2, 32
+; RV64-NEXT:    bgez a3, .LBB27_34
+; RV64-NEXT:  .LBB27_166: # %cond.store61
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 3
+; RV64-NEXT:    addiw a4, a4, -1920
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1890(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 62(a0)
+; RV64-NEXT:    slli a3, a2, 31
+; RV64-NEXT:    bgez a3, .LBB27_35
+; RV64-NEXT:  .LBB27_167: # %cond.store63
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    li a4, 5
+; RV64-NEXT:    slli a4, a4, 11
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1764(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 64(a0)
+; RV64-NEXT:    slli a3, a2, 30
+; RV64-NEXT:    bgez a3, .LBB27_36
+; RV64-NEXT:  .LBB27_168: # %cond.store65
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 2
+; RV64-NEXT:    addiw a4, a4, 1920
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1638(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 66(a0)
+; RV64-NEXT:    slli a3, a2, 29
+; RV64-NEXT:    bgez a3, .LBB27_37
+; RV64-NEXT:  .LBB27_169: # %cond.store67
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 2
+; RV64-NEXT:    addiw a4, a4, 1792
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1512(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 68(a0)
+; RV64-NEXT:    slli a3, a2, 28
+; RV64-NEXT:    bgez a3, .LBB27_38
+; RV64-NEXT:  .LBB27_170: # %cond.store69
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 2
+; RV64-NEXT:    addiw a4, a4, 1664
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1386(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 70(a0)
+; RV64-NEXT:    slli a3, a2, 27
+; RV64-NEXT:    bgez a3, .LBB27_39
+; RV64-NEXT:  .LBB27_171: # %cond.store71
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    li a4, 19
+; RV64-NEXT:    slli a4, a4, 9
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1260(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 72(a0)
+; RV64-NEXT:    slli a3, a2, 26
+; RV64-NEXT:    bgez a3, .LBB27_40
+; RV64-NEXT:  .LBB27_172: # %cond.store73
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 2
+; RV64-NEXT:    addiw a4, a4, 1408
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1134(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 74(a0)
+; RV64-NEXT:    slli a3, a2, 25
+; RV64-NEXT:    bgez a3, .LBB27_41
+; RV64-NEXT:  .LBB27_173: # %cond.store75
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 2
+; RV64-NEXT:    addiw a4, a4, 1280
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 1008(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 76(a0)
+; RV64-NEXT:    slli a3, a2, 24
+; RV64-NEXT:    bgez a3, .LBB27_42
+; RV64-NEXT:  .LBB27_174: # %cond.store77
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 2
+; RV64-NEXT:    addiw a4, a4, 1152
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 882(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 78(a0)
+; RV64-NEXT:    slli a3, a2, 23
+; RV64-NEXT:    bgez a3, .LBB27_43
+; RV64-NEXT:  .LBB27_175: # %cond.store79
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    li a4, 9
+; RV64-NEXT:    slli a4, a4, 10
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 756(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 80(a0)
+; RV64-NEXT:    slli a3, a2, 22
+; RV64-NEXT:    bgez a3, .LBB27_44
+; RV64-NEXT:  .LBB27_176: # %cond.store81
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 2
+; RV64-NEXT:    addiw a4, a4, 896
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 630(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 82(a0)
+; RV64-NEXT:    slli a3, a2, 21
+; RV64-NEXT:    bgez a3, .LBB27_45
+; RV64-NEXT:  .LBB27_177: # %cond.store83
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 2
+; RV64-NEXT:    addiw a4, a4, 768
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 504(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 84(a0)
+; RV64-NEXT:    slli a3, a2, 20
+; RV64-NEXT:    bgez a3, .LBB27_46
+; RV64-NEXT:  .LBB27_178: # %cond.store85
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 2
+; RV64-NEXT:    addiw a4, a4, 640
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 378(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 86(a0)
+; RV64-NEXT:    slli a3, a2, 19
+; RV64-NEXT:    bgez a3, .LBB27_47
+; RV64-NEXT:  .LBB27_179: # %cond.store87
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    li a4, 17
+; RV64-NEXT:    slli a4, a4, 9
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 252(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 88(a0)
+; RV64-NEXT:    slli a3, a2, 18
+; RV64-NEXT:    bgez a3, .LBB27_48
+; RV64-NEXT:  .LBB27_180: # %cond.store89
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 2
+; RV64-NEXT:    addiw a4, a4, 384
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a3, 126(a1)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 90(a0)
+; RV64-NEXT:    slli a3, a2, 17
+; RV64-NEXT:    bltz a3, .LBB27_49
+; RV64-NEXT:    j .LBB27_50
+; RV64-NEXT:  .LBB27_181: # %cond.store93
+; RV64-NEXT:    li a1, 64
+; RV64-NEXT:    lui a4, 2
+; RV64-NEXT:    addiw a4, a4, 128
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a1, 2016(a3)
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 94(a0)
+; RV64-NEXT:    slli a1, a2, 15
+; RV64-NEXT:    bgez a1, .LBB27_52
+; RV64-NEXT:  .LBB27_182: # %cond.store95
+; RV64-NEXT:    li a1, 64
+; RV64-NEXT:    lui a4, 2
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a1, 1890(a3)
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 96(a0)
+; RV64-NEXT:    slli a1, a2, 14
+; RV64-NEXT:    bgez a1, .LBB27_53
+; RV64-NEXT:  .LBB27_183: # %cond.store97
+; RV64-NEXT:    li a1, 64
+; RV64-NEXT:    lui a4, 2
+; RV64-NEXT:    addiw a4, a4, -128
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a1, 1764(a3)
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 98(a0)
+; RV64-NEXT:    slli a1, a2, 13
+; RV64-NEXT:    bgez a1, .LBB27_54
+; RV64-NEXT:  .LBB27_184: # %cond.store99
+; RV64-NEXT:    li a1, 64
+; RV64-NEXT:    li a4, 31
+; RV64-NEXT:    slli a4, a4, 8
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a1, 1638(a3)
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 100(a0)
+; RV64-NEXT:    slli a1, a2, 12
+; RV64-NEXT:    bgez a1, .LBB27_55
+; RV64-NEXT:  .LBB27_185: # %cond.store101
+; RV64-NEXT:    li a1, 64
+; RV64-NEXT:    lui a4, 2
+; RV64-NEXT:    addiw a4, a4, -384
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a1, 1512(a3)
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 102(a0)
+; RV64-NEXT:    slli a1, a2, 11
+; RV64-NEXT:    bgez a1, .LBB27_56
+; RV64-NEXT:  .LBB27_186: # %cond.store103
+; RV64-NEXT:    li a1, 64
+; RV64-NEXT:    li a4, 15
+; RV64-NEXT:    slli a4, a4, 9
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a1, 1386(a3)
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 104(a0)
+; RV64-NEXT:    slli a1, a2, 10
+; RV64-NEXT:    bgez a1, .LBB27_57
+; RV64-NEXT:  .LBB27_187: # %cond.store105
+; RV64-NEXT:    li a1, 64
+; RV64-NEXT:    lui a4, 2
+; RV64-NEXT:    addiw a4, a4, -640
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a1, 1260(a3)
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 106(a0)
+; RV64-NEXT:    slli a1, a2, 9
+; RV64-NEXT:    bgez a1, .LBB27_58
+; RV64-NEXT:  .LBB27_188: # %cond.store107
+; RV64-NEXT:    li a1, 64
+; RV64-NEXT:    li a4, 29
+; RV64-NEXT:    slli a4, a4, 8
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a1, 1134(a3)
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 108(a0)
+; RV64-NEXT:    slli a1, a2, 8
+; RV64-NEXT:    bgez a1, .LBB27_59
+; RV64-NEXT:  .LBB27_189: # %cond.store109
+; RV64-NEXT:    li a1, 64
+; RV64-NEXT:    lui a4, 2
+; RV64-NEXT:    addiw a4, a4, -896
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a1, 1008(a3)
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 110(a0)
+; RV64-NEXT:    slli a1, a2, 7
+; RV64-NEXT:    bgez a1, .LBB27_60
+; RV64-NEXT:  .LBB27_190: # %cond.store111
+; RV64-NEXT:    li a1, 64
+; RV64-NEXT:    li a4, 7
+; RV64-NEXT:    slli a4, a4, 10
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a1, 882(a3)
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 112(a0)
+; RV64-NEXT:    slli a1, a2, 6
+; RV64-NEXT:    bgez a1, .LBB27_61
+; RV64-NEXT:  .LBB27_191: # %cond.store113
+; RV64-NEXT:    li a1, 64
+; RV64-NEXT:    lui a4, 2
+; RV64-NEXT:    addiw a4, a4, -1152
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a1, 756(a3)
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 114(a0)
+; RV64-NEXT:    slli a1, a2, 5
+; RV64-NEXT:    bgez a1, .LBB27_62
+; RV64-NEXT:  .LBB27_192: # %cond.store115
+; RV64-NEXT:    li a1, 64
+; RV64-NEXT:    li a4, 27
+; RV64-NEXT:    slli a4, a4, 8
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a1, 630(a3)
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 116(a0)
+; RV64-NEXT:    slli a1, a2, 4
+; RV64-NEXT:    bgez a1, .LBB27_63
+; RV64-NEXT:  .LBB27_193: # %cond.store117
+; RV64-NEXT:    li a1, 64
+; RV64-NEXT:    lui a4, 2
+; RV64-NEXT:    addiw a4, a4, -1408
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a1, 504(a3)
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 118(a0)
+; RV64-NEXT:    slli a1, a2, 3
+; RV64-NEXT:    bgez a1, .LBB27_64
+; RV64-NEXT:  .LBB27_194: # %cond.store119
+; RV64-NEXT:    li a1, 64
+; RV64-NEXT:    li a4, 13
+; RV64-NEXT:    slli a4, a4, 9
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a1, 378(a3)
+; RV64-NEXT:    fmv.h.x fa5, a1
+; RV64-NEXT:    fsh fa5, 120(a0)
+; RV64-NEXT:    slli a1, a2, 2
+; RV64-NEXT:    bltz a1, .LBB27_65
+; RV64-NEXT:    j .LBB27_66
+; RV64-NEXT:  .LBB27_195: # %cond.store125
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    lui a4, 2
+; RV64-NEXT:    addiw a4, a4, -1920
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v8, (a4)
+; RV64-NEXT:    lh a2, 0(a3)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 126(a0)
+; RV64-NEXT:    andi a2, a1, 1
+; RV64-NEXT:    beqz a2, .LBB27_70
+; RV64-NEXT:  .LBB27_196: # %cond.store127
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vmv.x.s a2, v16
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 128(a0)
+; RV64-NEXT:    andi a2, a1, 2
+; RV64-NEXT:    beqz a2, .LBB27_71
+; RV64-NEXT:  .LBB27_197: # %cond.store129
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v8, v16, 1
+; RV64-NEXT:    vmv.x.s a2, v8
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 130(a0)
+; RV64-NEXT:    andi a2, a1, 4
+; RV64-NEXT:    beqz a2, .LBB27_72
+; RV64-NEXT:  .LBB27_198: # %cond.store131
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v8, v16, 2
+; RV64-NEXT:    vmv.x.s a2, v8
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 132(a0)
+; RV64-NEXT:    andi a2, a1, 8
+; RV64-NEXT:    beqz a2, .LBB27_73
+; RV64-NEXT:  .LBB27_199: # %cond.store133
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v8, v16, 3
+; RV64-NEXT:    vmv.x.s a2, v8
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 134(a0)
+; RV64-NEXT:    andi a2, a1, 16
+; RV64-NEXT:    beqz a2, .LBB27_74
+; RV64-NEXT:  .LBB27_200: # %cond.store135
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v8, v16, 4
+; RV64-NEXT:    vmv.x.s a2, v8
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 136(a0)
+; RV64-NEXT:    andi a2, a1, 32
+; RV64-NEXT:    beqz a2, .LBB27_75
+; RV64-NEXT:  .LBB27_201: # %cond.store137
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v8, v16, 5
+; RV64-NEXT:    vmv.x.s a2, v8
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 138(a0)
+; RV64-NEXT:    andi a2, a1, 64
+; RV64-NEXT:    beqz a2, .LBB27_76
+; RV64-NEXT:  .LBB27_202: # %cond.store139
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v8, v16, 6
+; RV64-NEXT:    vmv.x.s a2, v8
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 140(a0)
+; RV64-NEXT:    andi a2, a1, 128
+; RV64-NEXT:    beqz a2, .LBB27_77
+; RV64-NEXT:  .LBB27_203: # %cond.store141
+; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v8, v16, 7
+; RV64-NEXT:    vmv.x.s a2, v8
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 142(a0)
+; RV64-NEXT:    andi a2, a1, 256
+; RV64-NEXT:    beqz a2, .LBB27_78
+; RV64-NEXT:  .LBB27_204: # %cond.store143
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v8, v16, 8
+; RV64-NEXT:    vmv.x.s a2, v8
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 144(a0)
+; RV64-NEXT:    andi a2, a1, 512
+; RV64-NEXT:    beqz a2, .LBB27_79
+; RV64-NEXT:  .LBB27_205: # %cond.store145
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v8, v16, 9
+; RV64-NEXT:    vmv.x.s a2, v8
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 146(a0)
+; RV64-NEXT:    andi a2, a1, 1024
+; RV64-NEXT:    beqz a2, .LBB27_80
+; RV64-NEXT:  .LBB27_206: # %cond.store147
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v8, v16, 10
+; RV64-NEXT:    vmv.x.s a2, v8
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 148(a0)
+; RV64-NEXT:    slli a2, a1, 52
+; RV64-NEXT:    bgez a2, .LBB27_81
+; RV64-NEXT:  .LBB27_207: # %cond.store149
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v8, v16, 11
+; RV64-NEXT:    vmv.x.s a2, v8
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 150(a0)
+; RV64-NEXT:    slli a2, a1, 51
+; RV64-NEXT:    bgez a2, .LBB27_82
+; RV64-NEXT:  .LBB27_208: # %cond.store151
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v8, v16, 12
+; RV64-NEXT:    vmv.x.s a2, v8
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 152(a0)
+; RV64-NEXT:    slli a2, a1, 50
+; RV64-NEXT:    bgez a2, .LBB27_83
+; RV64-NEXT:  .LBB27_209: # %cond.store153
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v8, v16, 13
+; RV64-NEXT:    vmv.x.s a2, v8
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 154(a0)
+; RV64-NEXT:    slli a2, a1, 49
+; RV64-NEXT:    bgez a2, .LBB27_84
+; RV64-NEXT:  .LBB27_210: # %cond.store155
+; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-NEXT:    vslidedown.vi v8, v16, 14
+; RV64-NEXT:    vmv.x.s a2, v8
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 156(a0)
+; RV64-NEXT:    slli a2, a1, 48
+; RV64-NEXT:    bltz a2, .LBB27_85
+; RV64-NEXT:    j .LBB27_86
+; RV64-NEXT:  .LBB27_211: # %cond.store159
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    li a4, 3
+; RV64-NEXT:    slli a4, a4, 11
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 1890(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 160(a0)
+; RV64-NEXT:    slli a3, a1, 46
+; RV64-NEXT:    bgez a3, .LBB27_88
+; RV64-NEXT:  .LBB27_212: # %cond.store161
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 1
+; RV64-NEXT:    addiw a4, a4, 1920
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 1764(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 162(a0)
+; RV64-NEXT:    slli a3, a1, 45
+; RV64-NEXT:    bgez a3, .LBB27_89
+; RV64-NEXT:  .LBB27_213: # %cond.store163
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    li a4, 23
+; RV64-NEXT:    slli a4, a4, 8
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 1638(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 164(a0)
+; RV64-NEXT:    slli a3, a1, 44
+; RV64-NEXT:    bgez a3, .LBB27_90
+; RV64-NEXT:  .LBB27_214: # %cond.store165
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 1
+; RV64-NEXT:    addiw a4, a4, 1664
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 1512(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 166(a0)
+; RV64-NEXT:    slli a3, a1, 43
+; RV64-NEXT:    bgez a3, .LBB27_91
+; RV64-NEXT:  .LBB27_215: # %cond.store167
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    li a4, 11
+; RV64-NEXT:    slli a4, a4, 9
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 1386(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 168(a0)
+; RV64-NEXT:    slli a3, a1, 42
+; RV64-NEXT:    bgez a3, .LBB27_92
+; RV64-NEXT:  .LBB27_216: # %cond.store169
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 1
+; RV64-NEXT:    addiw a4, a4, 1408
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 1260(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 170(a0)
+; RV64-NEXT:    slli a3, a1, 41
+; RV64-NEXT:    bgez a3, .LBB27_93
+; RV64-NEXT:  .LBB27_217: # %cond.store171
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    li a4, 21
+; RV64-NEXT:    slli a4, a4, 8
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 1134(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 172(a0)
+; RV64-NEXT:    slli a3, a1, 40
+; RV64-NEXT:    bgez a3, .LBB27_94
+; RV64-NEXT:  .LBB27_218: # %cond.store173
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 1
+; RV64-NEXT:    addiw a4, a4, 1152
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 1008(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 174(a0)
+; RV64-NEXT:    slli a3, a1, 39
+; RV64-NEXT:    bgez a3, .LBB27_95
+; RV64-NEXT:  .LBB27_219: # %cond.store175
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    li a4, 5
+; RV64-NEXT:    slli a4, a4, 10
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 882(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 176(a0)
+; RV64-NEXT:    slli a3, a1, 38
+; RV64-NEXT:    bgez a3, .LBB27_96
+; RV64-NEXT:  .LBB27_220: # %cond.store177
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 1
+; RV64-NEXT:    addiw a4, a4, 896
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 756(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 178(a0)
+; RV64-NEXT:    slli a3, a1, 37
+; RV64-NEXT:    bgez a3, .LBB27_97
+; RV64-NEXT:  .LBB27_221: # %cond.store179
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    li a4, 19
+; RV64-NEXT:    slli a4, a4, 8
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 630(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 180(a0)
+; RV64-NEXT:    slli a3, a1, 36
+; RV64-NEXT:    bgez a3, .LBB27_98
+; RV64-NEXT:  .LBB27_222: # %cond.store181
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 1
+; RV64-NEXT:    addiw a4, a4, 640
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 504(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 182(a0)
+; RV64-NEXT:    slli a3, a1, 35
+; RV64-NEXT:    bgez a3, .LBB27_99
+; RV64-NEXT:  .LBB27_223: # %cond.store183
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    li a4, 9
+; RV64-NEXT:    slli a4, a4, 9
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 378(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 184(a0)
+; RV64-NEXT:    slli a3, a1, 34
+; RV64-NEXT:    bgez a3, .LBB27_100
+; RV64-NEXT:  .LBB27_224: # %cond.store185
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 1
+; RV64-NEXT:    addiw a4, a4, 384
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 252(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 186(a0)
+; RV64-NEXT:    slli a3, a1, 33
+; RV64-NEXT:    bgez a3, .LBB27_101
+; RV64-NEXT:  .LBB27_225: # %cond.store187
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    li a4, 17
+; RV64-NEXT:    slli a4, a4, 8
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 126(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 188(a0)
+; RV64-NEXT:    slli a3, a1, 32
+; RV64-NEXT:    bltz a3, .LBB27_102
+; RV64-NEXT:    j .LBB27_103
+; RV64-NEXT:  .LBB27_226: # %cond.store191
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    lui a4, 1
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 2016(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 192(a0)
+; RV64-NEXT:    slli a3, a1, 30
+; RV64-NEXT:    bgez a3, .LBB27_105
+; RV64-NEXT:  .LBB27_227: # %cond.store193
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 1921
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 1890(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 194(a0)
+; RV64-NEXT:    slli a3, a1, 29
+; RV64-NEXT:    bgez a3, .LBB27_106
+; RV64-NEXT:  .LBB27_228: # %cond.store195
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 1793
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 1764(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 196(a0)
+; RV64-NEXT:    slli a3, a1, 28
+; RV64-NEXT:    bgez a3, .LBB27_107
+; RV64-NEXT:  .LBB27_229: # %cond.store197
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 1665
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 1638(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 198(a0)
+; RV64-NEXT:    slli a3, a1, 27
+; RV64-NEXT:    bgez a3, .LBB27_108
+; RV64-NEXT:  .LBB27_230: # %cond.store199
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 1537
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 1512(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 200(a0)
+; RV64-NEXT:    slli a3, a1, 26
+; RV64-NEXT:    bgez a3, .LBB27_109
+; RV64-NEXT:  .LBB27_231: # %cond.store201
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 1409
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 1386(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 202(a0)
+; RV64-NEXT:    slli a3, a1, 25
+; RV64-NEXT:    bgez a3, .LBB27_110
+; RV64-NEXT:  .LBB27_232: # %cond.store203
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 1281
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 1260(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 204(a0)
+; RV64-NEXT:    slli a3, a1, 24
+; RV64-NEXT:    bgez a3, .LBB27_111
+; RV64-NEXT:  .LBB27_233: # %cond.store205
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 1153
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 1134(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 206(a0)
+; RV64-NEXT:    slli a3, a1, 23
+; RV64-NEXT:    bgez a3, .LBB27_112
+; RV64-NEXT:  .LBB27_234: # %cond.store207
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 1025
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 1008(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 208(a0)
+; RV64-NEXT:    slli a3, a1, 22
+; RV64-NEXT:    bltz a3, .LBB27_235
+; RV64-NEXT:    j .LBB27_113
+; RV64-NEXT:  .LBB27_235: # %cond.store209
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 897
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 882(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 210(a0)
+; RV64-NEXT:    slli a3, a1, 21
+; RV64-NEXT:    bltz a3, .LBB27_236
+; RV64-NEXT:    j .LBB27_114
+; RV64-NEXT:  .LBB27_236: # %cond.store211
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 769
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 756(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 212(a0)
+; RV64-NEXT:    slli a3, a1, 20
+; RV64-NEXT:    bltz a3, .LBB27_237
+; RV64-NEXT:    j .LBB27_115
+; RV64-NEXT:  .LBB27_237: # %cond.store213
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 641
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 630(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 214(a0)
+; RV64-NEXT:    slli a3, a1, 19
+; RV64-NEXT:    bltz a3, .LBB27_238
+; RV64-NEXT:    j .LBB27_116
+; RV64-NEXT:  .LBB27_238: # %cond.store215
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 513
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 504(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 216(a0)
+; RV64-NEXT:    slli a3, a1, 18
+; RV64-NEXT:    bltz a3, .LBB27_239
+; RV64-NEXT:    j .LBB27_117
+; RV64-NEXT:  .LBB27_239: # %cond.store217
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 385
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 378(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 218(a0)
+; RV64-NEXT:    slli a3, a1, 17
+; RV64-NEXT:    bltz a3, .LBB27_240
+; RV64-NEXT:    j .LBB27_118
+; RV64-NEXT:  .LBB27_240: # %cond.store219
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 257
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 252(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 220(a0)
+; RV64-NEXT:    slli a3, a1, 16
+; RV64-NEXT:    bltz a3, .LBB27_241
+; RV64-NEXT:    j .LBB27_119
+; RV64-NEXT:  .LBB27_241: # %cond.store221
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 129
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a3, 126(a2)
+; RV64-NEXT:    fmv.h.x fa5, a3
+; RV64-NEXT:    fsh fa5, 222(a0)
+; RV64-NEXT:    slli a3, a1, 15
+; RV64-NEXT:    bltz a3, .LBB27_242
+; RV64-NEXT:    j .LBB27_120
+; RV64-NEXT:  .LBB27_242: # %cond.store223
+; RV64-NEXT:    li a3, 64
+; RV64-NEXT:    addi a4, sp, 2047
+; RV64-NEXT:    addi a4, a4, 1
+; RV64-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a4)
+; RV64-NEXT:    lh a2, 0(a2)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 224(a0)
+; RV64-NEXT:    slli a2, a1, 14
+; RV64-NEXT:    bltz a2, .LBB27_243
+; RV64-NEXT:    j .LBB27_121
+; RV64-NEXT:  .LBB27_243: # %cond.store225
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    addi a3, sp, 1920
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a3)
+; RV64-NEXT:    lh a2, 2018(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 226(a0)
+; RV64-NEXT:    slli a2, a1, 13
+; RV64-NEXT:    bltz a2, .LBB27_244
+; RV64-NEXT:    j .LBB27_122
+; RV64-NEXT:  .LBB27_244: # %cond.store227
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    addi a3, sp, 1792
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a3)
+; RV64-NEXT:    lh a2, 1892(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 228(a0)
+; RV64-NEXT:    slli a2, a1, 12
+; RV64-NEXT:    bltz a2, .LBB27_245
+; RV64-NEXT:    j .LBB27_123
+; RV64-NEXT:  .LBB27_245: # %cond.store229
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    addi a3, sp, 1664
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a3)
+; RV64-NEXT:    lh a2, 1766(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 230(a0)
+; RV64-NEXT:    slli a2, a1, 11
+; RV64-NEXT:    bltz a2, .LBB27_246
+; RV64-NEXT:    j .LBB27_124
+; RV64-NEXT:  .LBB27_246: # %cond.store231
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    addi a3, sp, 1536
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a3)
+; RV64-NEXT:    lh a2, 1640(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 232(a0)
+; RV64-NEXT:    slli a2, a1, 10
+; RV64-NEXT:    bltz a2, .LBB27_247
+; RV64-NEXT:    j .LBB27_125
+; RV64-NEXT:  .LBB27_247: # %cond.store233
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    addi a3, sp, 1408
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a3)
+; RV64-NEXT:    lh a2, 1514(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 234(a0)
+; RV64-NEXT:    slli a2, a1, 9
+; RV64-NEXT:    bltz a2, .LBB27_248
+; RV64-NEXT:    j .LBB27_126
+; RV64-NEXT:  .LBB27_248: # %cond.store235
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    addi a3, sp, 1280
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a3)
+; RV64-NEXT:    lh a2, 1388(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 236(a0)
+; RV64-NEXT:    slli a2, a1, 8
+; RV64-NEXT:    bltz a2, .LBB27_249
+; RV64-NEXT:    j .LBB27_127
+; RV64-NEXT:  .LBB27_249: # %cond.store237
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    addi a3, sp, 1152
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a3)
+; RV64-NEXT:    lh a2, 1262(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 238(a0)
+; RV64-NEXT:    slli a2, a1, 7
+; RV64-NEXT:    bltz a2, .LBB27_250
+; RV64-NEXT:    j .LBB27_128
+; RV64-NEXT:  .LBB27_250: # %cond.store239
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    addi a3, sp, 1024
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a3)
+; RV64-NEXT:    lh a2, 1136(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 240(a0)
+; RV64-NEXT:    slli a2, a1, 6
+; RV64-NEXT:    bltz a2, .LBB27_251
+; RV64-NEXT:    j .LBB27_129
+; RV64-NEXT:  .LBB27_251: # %cond.store241
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    addi a3, sp, 896
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a3)
+; RV64-NEXT:    lh a2, 1010(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 242(a0)
+; RV64-NEXT:    slli a2, a1, 5
+; RV64-NEXT:    bltz a2, .LBB27_252
+; RV64-NEXT:    j .LBB27_130
+; RV64-NEXT:  .LBB27_252: # %cond.store243
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    addi a3, sp, 768
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a3)
+; RV64-NEXT:    lh a2, 884(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 244(a0)
+; RV64-NEXT:    slli a2, a1, 4
+; RV64-NEXT:    bltz a2, .LBB27_253
+; RV64-NEXT:    j .LBB27_131
+; RV64-NEXT:  .LBB27_253: # %cond.store245
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    addi a3, sp, 640
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a3)
+; RV64-NEXT:    lh a2, 758(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 246(a0)
+; RV64-NEXT:    slli a2, a1, 3
+; RV64-NEXT:    bltz a2, .LBB27_254
+; RV64-NEXT:    j .LBB27_132
+; RV64-NEXT:  .LBB27_254: # %cond.store247
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    addi a3, sp, 512
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a3)
+; RV64-NEXT:    lh a2, 632(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 248(a0)
+; RV64-NEXT:    slli a2, a1, 2
+; RV64-NEXT:    bltz a2, .LBB27_255
+; RV64-NEXT:    j .LBB27_133
+; RV64-NEXT:  .LBB27_255: # %cond.store249
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    addi a3, sp, 384
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a3)
+; RV64-NEXT:    lh a2, 506(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 250(a0)
+; RV64-NEXT:    slli a2, a1, 1
+; RV64-NEXT:    bltz a2, .LBB27_256
+; RV64-NEXT:    j .LBB27_134
+; RV64-NEXT:  .LBB27_256: # %cond.store251
+; RV64-NEXT:    li a2, 64
+; RV64-NEXT:    addi a3, sp, 256
+; RV64-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-NEXT:    vse16.v v16, (a3)
+; RV64-NEXT:    lh a2, 380(sp)
+; RV64-NEXT:    fmv.h.x fa5, a2
+; RV64-NEXT:    fsh fa5, 252(a0)
+; RV64-NEXT:    bgez a1, .LBB27_257
+; RV64-NEXT:    j .LBB27_135
+; RV64-NEXT:  .LBB27_257: # %cond.store251
+; RV64-NEXT:    j .LBB27_136
+  call void @llvm.masked.store.v128bf16.p0(<128 x bfloat> %val, ptr %a, i32 8, <128 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v128f16.p0v128f16(<128 x half>, ptr, i32, <128 x i1>)
+
+define void @masked_store_v128f16(<128 x half> %val, ptr %a, <128 x i1> %mask) {
+; ZVFH-LABEL: masked_store_v128f16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    li a1, 64
+; ZVFH-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; ZVFH-NEXT:    vse16.v v8, (a0), v0.t
+; ZVFH-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
+; ZVFH-NEXT:    vslidedown.vi v0, v0, 8
+; ZVFH-NEXT:    addi a0, a0, 128
+; ZVFH-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; ZVFH-NEXT:    vse16.v v16, (a0), v0.t
+; ZVFH-NEXT:    ret
+;
+; RV32-ZVFHMIN-LABEL: masked_store_v128f16:
+; RV32-ZVFHMIN:       # %bb.0:
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a3, v0
+; RV32-ZVFHMIN-NEXT:    andi a1, a3, 1
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB28_141
+; RV32-ZVFHMIN-NEXT:  # %bb.1: # %else
+; RV32-ZVFHMIN-NEXT:    andi a1, a3, 2
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB28_142
+; RV32-ZVFHMIN-NEXT:  .LBB28_2: # %else2
+; RV32-ZVFHMIN-NEXT:    andi a1, a3, 4
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB28_143
+; RV32-ZVFHMIN-NEXT:  .LBB28_3: # %else4
+; RV32-ZVFHMIN-NEXT:    andi a1, a3, 8
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB28_144
+; RV32-ZVFHMIN-NEXT:  .LBB28_4: # %else6
+; RV32-ZVFHMIN-NEXT:    andi a1, a3, 16
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB28_145
+; RV32-ZVFHMIN-NEXT:  .LBB28_5: # %else8
+; RV32-ZVFHMIN-NEXT:    andi a1, a3, 32
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB28_146
+; RV32-ZVFHMIN-NEXT:  .LBB28_6: # %else10
+; RV32-ZVFHMIN-NEXT:    andi a1, a3, 64
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB28_147
+; RV32-ZVFHMIN-NEXT:  .LBB28_7: # %else12
+; RV32-ZVFHMIN-NEXT:    andi a1, a3, 128
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB28_148
+; RV32-ZVFHMIN-NEXT:  .LBB28_8: # %else14
+; RV32-ZVFHMIN-NEXT:    andi a1, a3, 256
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB28_149
+; RV32-ZVFHMIN-NEXT:  .LBB28_9: # %else16
+; RV32-ZVFHMIN-NEXT:    andi a1, a3, 512
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB28_150
+; RV32-ZVFHMIN-NEXT:  .LBB28_10: # %else18
+; RV32-ZVFHMIN-NEXT:    andi a1, a3, 1024
+; RV32-ZVFHMIN-NEXT:    bnez a1, .LBB28_151
+; RV32-ZVFHMIN-NEXT:  .LBB28_11: # %else20
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 20
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_152
+; RV32-ZVFHMIN-NEXT:  .LBB28_12: # %else22
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 19
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_153
+; RV32-ZVFHMIN-NEXT:  .LBB28_13: # %else24
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 18
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_154
+; RV32-ZVFHMIN-NEXT:  .LBB28_14: # %else26
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 17
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_155
+; RV32-ZVFHMIN-NEXT:  .LBB28_15: # %else28
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 16
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_17
+; RV32-ZVFHMIN-NEXT:  .LBB28_16: # %cond.store29
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 15
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 30(a0)
+; RV32-ZVFHMIN-NEXT:  .LBB28_17: # %else30
+; RV32-ZVFHMIN-NEXT:    addi sp, sp, -2032
+; RV32-ZVFHMIN-NEXT:    .cfi_def_cfa_offset 2032
+; RV32-ZVFHMIN-NEXT:    sw ra, 2028(sp) # 4-byte Folded Spill
+; RV32-ZVFHMIN-NEXT:    sw s0, 2024(sp) # 4-byte Folded Spill
+; RV32-ZVFHMIN-NEXT:    .cfi_offset ra, -4
+; RV32-ZVFHMIN-NEXT:    .cfi_offset s0, -8
+; RV32-ZVFHMIN-NEXT:    addi s0, sp, 2032
+; RV32-ZVFHMIN-NEXT:    .cfi_def_cfa s0, 0
+; RV32-ZVFHMIN-NEXT:    .cfi_remember_state
+; RV32-ZVFHMIN-NEXT:    lui a1, 3
+; RV32-ZVFHMIN-NEXT:    addi a1, a1, -1776
+; RV32-ZVFHMIN-NEXT:    sub sp, sp, a1
+; RV32-ZVFHMIN-NEXT:    andi sp, sp, -128
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 15
+; RV32-ZVFHMIN-NEXT:    lui a2, 3
+; RV32-ZVFHMIN-NEXT:    addi a2, a2, -1606
+; RV32-ZVFHMIN-NEXT:    add a2, sp, a2
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_156
+; RV32-ZVFHMIN-NEXT:  # %bb.18: # %else32
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 14
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_157
+; RV32-ZVFHMIN-NEXT:  .LBB28_19: # %else34
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 13
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_158
+; RV32-ZVFHMIN-NEXT:  .LBB28_20: # %else36
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 12
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_159
+; RV32-ZVFHMIN-NEXT:  .LBB28_21: # %else38
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 11
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_160
+; RV32-ZVFHMIN-NEXT:  .LBB28_22: # %else40
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 10
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_161
+; RV32-ZVFHMIN-NEXT:  .LBB28_23: # %else42
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 9
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_162
+; RV32-ZVFHMIN-NEXT:  .LBB28_24: # %else44
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 8
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_163
+; RV32-ZVFHMIN-NEXT:  .LBB28_25: # %else46
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 7
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_164
+; RV32-ZVFHMIN-NEXT:  .LBB28_26: # %else48
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 6
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_165
+; RV32-ZVFHMIN-NEXT:  .LBB28_27: # %else50
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 5
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_166
+; RV32-ZVFHMIN-NEXT:  .LBB28_28: # %else52
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 4
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_167
+; RV32-ZVFHMIN-NEXT:  .LBB28_29: # %else54
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 3
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_31
+; RV32-ZVFHMIN-NEXT:  .LBB28_30: # %cond.store55
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    li a4, 21
+; RV32-ZVFHMIN-NEXT:    slli a4, a4, 9
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a1, 126(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 56(a0)
+; RV32-ZVFHMIN-NEXT:  .LBB28_31: # %else56
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 2
+; RV32-ZVFHMIN-NEXT:    li a1, 32
+; RV32-ZVFHMIN-NEXT:    bgez a4, .LBB28_33
+; RV32-ZVFHMIN-NEXT:  # %bb.32: # %cond.store57
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 3
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, -1664
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a2, 0(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 58(a0)
+; RV32-ZVFHMIN-NEXT:  .LBB28_33: # %else58
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vsrl.vx v24, v0, a1
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 1
+; RV32-ZVFHMIN-NEXT:    lui a4, 2
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 348
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_35
+; RV32-ZVFHMIN-NEXT:  # %bb.34: # %cond.store59
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 3
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, -1792
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a2, 2016(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 60(a0)
+; RV32-ZVFHMIN-NEXT:  .LBB28_35: # %else60
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v24
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_168
+; RV32-ZVFHMIN-NEXT:  # %bb.36: # %else62
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 1
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_169
+; RV32-ZVFHMIN-NEXT:  .LBB28_37: # %else64
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 2
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_170
+; RV32-ZVFHMIN-NEXT:  .LBB28_38: # %else66
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 4
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_171
+; RV32-ZVFHMIN-NEXT:  .LBB28_39: # %else68
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 8
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_172
+; RV32-ZVFHMIN-NEXT:  .LBB28_40: # %else70
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 16
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_173
+; RV32-ZVFHMIN-NEXT:  .LBB28_41: # %else72
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 32
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_174
+; RV32-ZVFHMIN-NEXT:  .LBB28_42: # %else74
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 64
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_175
+; RV32-ZVFHMIN-NEXT:  .LBB28_43: # %else76
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 128
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_176
+; RV32-ZVFHMIN-NEXT:  .LBB28_44: # %else78
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 256
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_177
+; RV32-ZVFHMIN-NEXT:  .LBB28_45: # %else80
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 512
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_178
+; RV32-ZVFHMIN-NEXT:  .LBB28_46: # %else82
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 1024
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_179
+; RV32-ZVFHMIN-NEXT:  .LBB28_47: # %else84
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 20
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_180
+; RV32-ZVFHMIN-NEXT:  .LBB28_48: # %else86
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 19
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_181
+; RV32-ZVFHMIN-NEXT:  .LBB28_49: # %else88
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 18
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_182
+; RV32-ZVFHMIN-NEXT:  .LBB28_50: # %else90
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 17
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_52
+; RV32-ZVFHMIN-NEXT:  .LBB28_51: # %cond.store91
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 2
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, 256
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 0(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 92(a0)
+; RV32-ZVFHMIN-NEXT:  .LBB28_52: # %else92
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 16
+; RV32-ZVFHMIN-NEXT:    lui a4, 2
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, -1794
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_183
+; RV32-ZVFHMIN-NEXT:  # %bb.53: # %else94
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 15
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_184
+; RV32-ZVFHMIN-NEXT:  .LBB28_54: # %else96
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 14
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_185
+; RV32-ZVFHMIN-NEXT:  .LBB28_55: # %else98
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 13
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_186
+; RV32-ZVFHMIN-NEXT:  .LBB28_56: # %else100
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 12
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_187
+; RV32-ZVFHMIN-NEXT:  .LBB28_57: # %else102
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 11
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_188
+; RV32-ZVFHMIN-NEXT:  .LBB28_58: # %else104
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 10
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_189
+; RV32-ZVFHMIN-NEXT:  .LBB28_59: # %else106
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 9
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_190
+; RV32-ZVFHMIN-NEXT:  .LBB28_60: # %else108
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 8
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_191
+; RV32-ZVFHMIN-NEXT:  .LBB28_61: # %else110
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 7
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_192
+; RV32-ZVFHMIN-NEXT:  .LBB28_62: # %else112
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 6
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_193
+; RV32-ZVFHMIN-NEXT:  .LBB28_63: # %else114
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 5
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_194
+; RV32-ZVFHMIN-NEXT:  .LBB28_64: # %else116
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 4
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_195
+; RV32-ZVFHMIN-NEXT:  .LBB28_65: # %else118
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 3
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_196
+; RV32-ZVFHMIN-NEXT:  .LBB28_66: # %else120
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 2
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_68
+; RV32-ZVFHMIN-NEXT:  .LBB28_67: # %cond.store121
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 2
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, -1664
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 252(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 122(a0)
+; RV32-ZVFHMIN-NEXT:  .LBB28_68: # %else122
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 1
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v24, v0, 1
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_70
+; RV32-ZVFHMIN-NEXT:  # %bb.69: # %cond.store123
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    li a5, 25
+; RV32-ZVFHMIN-NEXT:    slli a5, a5, 8
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 126(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 124(a0)
+; RV32-ZVFHMIN-NEXT:  .LBB28_70: # %else124
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a3, v24
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_197
+; RV32-ZVFHMIN-NEXT:  # %bb.71: # %else126
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 1
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_198
+; RV32-ZVFHMIN-NEXT:  .LBB28_72: # %else128
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 2
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_199
+; RV32-ZVFHMIN-NEXT:  .LBB28_73: # %else130
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 4
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_200
+; RV32-ZVFHMIN-NEXT:  .LBB28_74: # %else132
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 8
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_201
+; RV32-ZVFHMIN-NEXT:  .LBB28_75: # %else134
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 16
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_202
+; RV32-ZVFHMIN-NEXT:  .LBB28_76: # %else136
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 32
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_203
+; RV32-ZVFHMIN-NEXT:  .LBB28_77: # %else138
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 64
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_204
+; RV32-ZVFHMIN-NEXT:  .LBB28_78: # %else140
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 128
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_205
+; RV32-ZVFHMIN-NEXT:  .LBB28_79: # %else142
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 256
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_206
+; RV32-ZVFHMIN-NEXT:  .LBB28_80: # %else144
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 512
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_207
+; RV32-ZVFHMIN-NEXT:  .LBB28_81: # %else146
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 1024
+; RV32-ZVFHMIN-NEXT:    bnez a2, .LBB28_208
+; RV32-ZVFHMIN-NEXT:  .LBB28_82: # %else148
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 20
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_209
+; RV32-ZVFHMIN-NEXT:  .LBB28_83: # %else150
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 19
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_210
+; RV32-ZVFHMIN-NEXT:  .LBB28_84: # %else152
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 18
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_211
+; RV32-ZVFHMIN-NEXT:  .LBB28_85: # %else154
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 17
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_212
+; RV32-ZVFHMIN-NEXT:  .LBB28_86: # %else156
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 16
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_88
+; RV32-ZVFHMIN-NEXT:  .LBB28_87: # %cond.store157
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 15
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 158(a0)
+; RV32-ZVFHMIN-NEXT:  .LBB28_88: # %else158
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 15
+; RV32-ZVFHMIN-NEXT:    lui a2, 1
+; RV32-ZVFHMIN-NEXT:    addi a2, a2, 190
+; RV32-ZVFHMIN-NEXT:    add a2, sp, a2
+; RV32-ZVFHMIN-NEXT:    bltz a4, .LBB28_213
+; RV32-ZVFHMIN-NEXT:  # %bb.89: # %else160
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 14
+; RV32-ZVFHMIN-NEXT:    bltz a4, .LBB28_214
+; RV32-ZVFHMIN-NEXT:  .LBB28_90: # %else162
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 13
+; RV32-ZVFHMIN-NEXT:    bltz a4, .LBB28_215
+; RV32-ZVFHMIN-NEXT:  .LBB28_91: # %else164
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 12
+; RV32-ZVFHMIN-NEXT:    bltz a4, .LBB28_216
+; RV32-ZVFHMIN-NEXT:  .LBB28_92: # %else166
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 11
+; RV32-ZVFHMIN-NEXT:    bltz a4, .LBB28_217
+; RV32-ZVFHMIN-NEXT:  .LBB28_93: # %else168
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 10
+; RV32-ZVFHMIN-NEXT:    bltz a4, .LBB28_218
+; RV32-ZVFHMIN-NEXT:  .LBB28_94: # %else170
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 9
+; RV32-ZVFHMIN-NEXT:    bltz a4, .LBB28_219
+; RV32-ZVFHMIN-NEXT:  .LBB28_95: # %else172
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 8
+; RV32-ZVFHMIN-NEXT:    bltz a4, .LBB28_220
+; RV32-ZVFHMIN-NEXT:  .LBB28_96: # %else174
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 7
+; RV32-ZVFHMIN-NEXT:    bltz a4, .LBB28_221
+; RV32-ZVFHMIN-NEXT:  .LBB28_97: # %else176
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 6
+; RV32-ZVFHMIN-NEXT:    bltz a4, .LBB28_222
+; RV32-ZVFHMIN-NEXT:  .LBB28_98: # %else178
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 5
+; RV32-ZVFHMIN-NEXT:    bltz a4, .LBB28_223
+; RV32-ZVFHMIN-NEXT:  .LBB28_99: # %else180
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 4
+; RV32-ZVFHMIN-NEXT:    bltz a4, .LBB28_224
+; RV32-ZVFHMIN-NEXT:  .LBB28_100: # %else182
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 3
+; RV32-ZVFHMIN-NEXT:    bltz a4, .LBB28_225
+; RV32-ZVFHMIN-NEXT:  .LBB28_101: # %else184
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 2
+; RV32-ZVFHMIN-NEXT:    bgez a4, .LBB28_103
+; RV32-ZVFHMIN-NEXT:  .LBB28_102: # %cond.store185
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 1
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, 384
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a4, 252(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a4
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 186(a0)
+; RV32-ZVFHMIN-NEXT:  .LBB28_103: # %else186
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 1
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vsrl.vx v8, v24, a1
+; RV32-ZVFHMIN-NEXT:    bgez a4, .LBB28_105
+; RV32-ZVFHMIN-NEXT:  # %bb.104: # %cond.store187
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    li a4, 17
+; RV32-ZVFHMIN-NEXT:    slli a4, a4, 8
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a1, 126(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 188(a0)
+; RV32-ZVFHMIN-NEXT:  .LBB28_105: # %else188
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v8
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_107
+; RV32-ZVFHMIN-NEXT:  # %bb.106: # %cond.store189
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    lui a4, 1
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 128
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a2, 0(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 190(a0)
+; RV32-ZVFHMIN-NEXT:  .LBB28_107: # %else190
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 1
+; RV32-ZVFHMIN-NEXT:    addi a2, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a2, a2, 97
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_226
+; RV32-ZVFHMIN-NEXT:  # %bb.108: # %else192
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 2
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_227
+; RV32-ZVFHMIN-NEXT:  .LBB28_109: # %else194
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 4
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_228
+; RV32-ZVFHMIN-NEXT:  .LBB28_110: # %else196
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 8
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_229
+; RV32-ZVFHMIN-NEXT:  .LBB28_111: # %else198
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 16
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_230
+; RV32-ZVFHMIN-NEXT:  .LBB28_112: # %else200
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 32
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_231
+; RV32-ZVFHMIN-NEXT:  .LBB28_113: # %else202
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 64
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_232
+; RV32-ZVFHMIN-NEXT:  .LBB28_114: # %else204
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 128
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_233
+; RV32-ZVFHMIN-NEXT:  .LBB28_115: # %else206
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 256
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_234
+; RV32-ZVFHMIN-NEXT:  .LBB28_116: # %else208
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 512
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_235
+; RV32-ZVFHMIN-NEXT:  .LBB28_117: # %else210
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 1024
+; RV32-ZVFHMIN-NEXT:    bnez a3, .LBB28_236
+; RV32-ZVFHMIN-NEXT:  .LBB28_118: # %else212
+; RV32-ZVFHMIN-NEXT:    slli a3, a1, 20
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_237
+; RV32-ZVFHMIN-NEXT:  .LBB28_119: # %else214
+; RV32-ZVFHMIN-NEXT:    slli a3, a1, 19
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_238
+; RV32-ZVFHMIN-NEXT:  .LBB28_120: # %else216
+; RV32-ZVFHMIN-NEXT:    slli a3, a1, 18
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_239
+; RV32-ZVFHMIN-NEXT:  .LBB28_121: # %else218
+; RV32-ZVFHMIN-NEXT:    slli a3, a1, 17
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_240
+; RV32-ZVFHMIN-NEXT:  .LBB28_122: # %else220
+; RV32-ZVFHMIN-NEXT:    slli a3, a1, 16
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_123
+; RV32-ZVFHMIN-NEXT:    j .LBB28_241
+; RV32-ZVFHMIN-NEXT:  .LBB28_123: # %else222
+; RV32-ZVFHMIN-NEXT:    slli a3, a1, 15
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_124
+; RV32-ZVFHMIN-NEXT:    j .LBB28_242
+; RV32-ZVFHMIN-NEXT:  .LBB28_124: # %else224
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 14
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_125
+; RV32-ZVFHMIN-NEXT:    j .LBB28_243
+; RV32-ZVFHMIN-NEXT:  .LBB28_125: # %else226
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 13
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_126
+; RV32-ZVFHMIN-NEXT:    j .LBB28_244
+; RV32-ZVFHMIN-NEXT:  .LBB28_126: # %else228
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 12
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_127
+; RV32-ZVFHMIN-NEXT:    j .LBB28_245
+; RV32-ZVFHMIN-NEXT:  .LBB28_127: # %else230
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 11
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_128
+; RV32-ZVFHMIN-NEXT:    j .LBB28_246
+; RV32-ZVFHMIN-NEXT:  .LBB28_128: # %else232
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 10
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_129
+; RV32-ZVFHMIN-NEXT:    j .LBB28_247
+; RV32-ZVFHMIN-NEXT:  .LBB28_129: # %else234
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 9
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_130
+; RV32-ZVFHMIN-NEXT:    j .LBB28_248
+; RV32-ZVFHMIN-NEXT:  .LBB28_130: # %else236
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 8
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_131
+; RV32-ZVFHMIN-NEXT:    j .LBB28_249
+; RV32-ZVFHMIN-NEXT:  .LBB28_131: # %else238
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 7
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_132
+; RV32-ZVFHMIN-NEXT:    j .LBB28_250
+; RV32-ZVFHMIN-NEXT:  .LBB28_132: # %else240
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 6
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_133
+; RV32-ZVFHMIN-NEXT:    j .LBB28_251
+; RV32-ZVFHMIN-NEXT:  .LBB28_133: # %else242
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 5
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_134
+; RV32-ZVFHMIN-NEXT:    j .LBB28_252
+; RV32-ZVFHMIN-NEXT:  .LBB28_134: # %else244
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 4
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_135
+; RV32-ZVFHMIN-NEXT:    j .LBB28_253
+; RV32-ZVFHMIN-NEXT:  .LBB28_135: # %else246
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 3
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_136
+; RV32-ZVFHMIN-NEXT:    j .LBB28_254
+; RV32-ZVFHMIN-NEXT:  .LBB28_136: # %else248
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 2
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_137
+; RV32-ZVFHMIN-NEXT:    j .LBB28_255
+; RV32-ZVFHMIN-NEXT:  .LBB28_137: # %else250
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 1
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_138
+; RV32-ZVFHMIN-NEXT:    j .LBB28_256
+; RV32-ZVFHMIN-NEXT:  .LBB28_138: # %else252
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_140
+; RV32-ZVFHMIN-NEXT:  .LBB28_139: # %cond.store253
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    addi a2, sp, 128
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a2)
+; RV32-ZVFHMIN-NEXT:    lh a1, 254(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 254(a0)
+; RV32-ZVFHMIN-NEXT:  .LBB28_140: # %else254
+; RV32-ZVFHMIN-NEXT:    addi sp, s0, -2032
+; RV32-ZVFHMIN-NEXT:    .cfi_def_cfa sp, 2032
+; RV32-ZVFHMIN-NEXT:    lw ra, 2028(sp) # 4-byte Folded Reload
+; RV32-ZVFHMIN-NEXT:    lw s0, 2024(sp) # 4-byte Folded Reload
+; RV32-ZVFHMIN-NEXT:    .cfi_restore ra
+; RV32-ZVFHMIN-NEXT:    .cfi_restore s0
+; RV32-ZVFHMIN-NEXT:    addi sp, sp, 2032
+; RV32-ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; RV32-ZVFHMIN-NEXT:    ret
+; RV32-ZVFHMIN-NEXT:  .LBB28_141: # %cond.store
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v8
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 0(a0)
+; RV32-ZVFHMIN-NEXT:    andi a1, a3, 2
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB28_2
+; RV32-ZVFHMIN-NEXT:  .LBB28_142: # %cond.store1
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 1
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 2(a0)
+; RV32-ZVFHMIN-NEXT:    andi a1, a3, 4
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB28_3
+; RV32-ZVFHMIN-NEXT:  .LBB28_143: # %cond.store3
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 2
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 4(a0)
+; RV32-ZVFHMIN-NEXT:    andi a1, a3, 8
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB28_4
+; RV32-ZVFHMIN-NEXT:  .LBB28_144: # %cond.store5
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 3
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 6(a0)
+; RV32-ZVFHMIN-NEXT:    andi a1, a3, 16
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB28_5
+; RV32-ZVFHMIN-NEXT:  .LBB28_145: # %cond.store7
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 4
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 8(a0)
+; RV32-ZVFHMIN-NEXT:    andi a1, a3, 32
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB28_6
+; RV32-ZVFHMIN-NEXT:  .LBB28_146: # %cond.store9
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 5
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 10(a0)
+; RV32-ZVFHMIN-NEXT:    andi a1, a3, 64
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB28_7
+; RV32-ZVFHMIN-NEXT:  .LBB28_147: # %cond.store11
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 6
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 12(a0)
+; RV32-ZVFHMIN-NEXT:    andi a1, a3, 128
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB28_8
+; RV32-ZVFHMIN-NEXT:  .LBB28_148: # %cond.store13
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 7
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 14(a0)
+; RV32-ZVFHMIN-NEXT:    andi a1, a3, 256
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB28_9
+; RV32-ZVFHMIN-NEXT:  .LBB28_149: # %cond.store15
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 8
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 16(a0)
+; RV32-ZVFHMIN-NEXT:    andi a1, a3, 512
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB28_10
+; RV32-ZVFHMIN-NEXT:  .LBB28_150: # %cond.store17
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 9
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 18(a0)
+; RV32-ZVFHMIN-NEXT:    andi a1, a3, 1024
+; RV32-ZVFHMIN-NEXT:    beqz a1, .LBB28_11
+; RV32-ZVFHMIN-NEXT:  .LBB28_151: # %cond.store19
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 10
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 20(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 20
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_12
+; RV32-ZVFHMIN-NEXT:  .LBB28_152: # %cond.store21
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 11
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 22(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 19
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_13
+; RV32-ZVFHMIN-NEXT:  .LBB28_153: # %cond.store23
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 12
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 24(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 18
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_14
+; RV32-ZVFHMIN-NEXT:  .LBB28_154: # %cond.store25
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 13
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 26(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 17
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_15
+; RV32-ZVFHMIN-NEXT:  .LBB28_155: # %cond.store27
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 14
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 28(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 16
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_16
+; RV32-ZVFHMIN-NEXT:    j .LBB28_17
+; RV32-ZVFHMIN-NEXT:  .LBB28_156: # %cond.store31
+; RV32-ZVFHMIN-NEXT:    .cfi_restore_state
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    lui a4, 3
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a1, 1638(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 32(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 14
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_19
+; RV32-ZVFHMIN-NEXT:  .LBB28_157: # %cond.store33
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    lui a4, 3
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, -128
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a1, 1512(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 34(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 13
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_20
+; RV32-ZVFHMIN-NEXT:  .LBB28_158: # %cond.store35
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    lui a4, 3
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, -256
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a1, 1386(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 36(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 12
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_21
+; RV32-ZVFHMIN-NEXT:  .LBB28_159: # %cond.store37
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    lui a4, 3
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, -384
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a1, 1260(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 38(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 11
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_22
+; RV32-ZVFHMIN-NEXT:  .LBB28_160: # %cond.store39
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    li a4, 23
+; RV32-ZVFHMIN-NEXT:    slli a4, a4, 9
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a1, 1134(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 40(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 10
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_23
+; RV32-ZVFHMIN-NEXT:  .LBB28_161: # %cond.store41
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    lui a4, 3
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, -640
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a1, 1008(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 42(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 9
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_24
+; RV32-ZVFHMIN-NEXT:  .LBB28_162: # %cond.store43
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    lui a4, 3
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, -768
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a1, 882(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 44(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 8
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_25
+; RV32-ZVFHMIN-NEXT:  .LBB28_163: # %cond.store45
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    lui a4, 3
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, -896
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a1, 756(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 46(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 7
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_26
+; RV32-ZVFHMIN-NEXT:  .LBB28_164: # %cond.store47
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    li a4, 11
+; RV32-ZVFHMIN-NEXT:    slli a4, a4, 10
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a1, 630(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 48(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 6
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_27
+; RV32-ZVFHMIN-NEXT:  .LBB28_165: # %cond.store49
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    lui a4, 3
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, -1152
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a1, 504(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 50(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 5
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_28
+; RV32-ZVFHMIN-NEXT:  .LBB28_166: # %cond.store51
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    lui a4, 3
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, -1280
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a1, 378(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 52(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 4
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_29
+; RV32-ZVFHMIN-NEXT:  .LBB28_167: # %cond.store53
+; RV32-ZVFHMIN-NEXT:    li a1, 64
+; RV32-ZVFHMIN-NEXT:    lui a4, 3
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, -1408
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a1, 252(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 54(a0)
+; RV32-ZVFHMIN-NEXT:    slli a1, a3, 3
+; RV32-ZVFHMIN-NEXT:    bltz a1, .LBB28_30
+; RV32-ZVFHMIN-NEXT:    j .LBB28_31
+; RV32-ZVFHMIN-NEXT:  .LBB28_168: # %cond.store61
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 3
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, -1920
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1890(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 62(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 1
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_37
+; RV32-ZVFHMIN-NEXT:  .LBB28_169: # %cond.store63
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    li a5, 5
+; RV32-ZVFHMIN-NEXT:    slli a5, a5, 11
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1764(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 64(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 2
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_38
+; RV32-ZVFHMIN-NEXT:  .LBB28_170: # %cond.store65
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 2
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, 1920
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1638(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 66(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 4
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_39
+; RV32-ZVFHMIN-NEXT:  .LBB28_171: # %cond.store67
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 2
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, 1792
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1512(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 68(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 8
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_40
+; RV32-ZVFHMIN-NEXT:  .LBB28_172: # %cond.store69
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 2
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, 1664
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1386(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 70(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 16
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_41
+; RV32-ZVFHMIN-NEXT:  .LBB28_173: # %cond.store71
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    li a5, 19
+; RV32-ZVFHMIN-NEXT:    slli a5, a5, 9
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1260(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 72(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 32
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_42
+; RV32-ZVFHMIN-NEXT:  .LBB28_174: # %cond.store73
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 2
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, 1408
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1134(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 74(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 64
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_43
+; RV32-ZVFHMIN-NEXT:  .LBB28_175: # %cond.store75
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 2
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, 1280
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1008(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 76(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 128
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_44
+; RV32-ZVFHMIN-NEXT:  .LBB28_176: # %cond.store77
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 2
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, 1152
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 882(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 78(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 256
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_45
+; RV32-ZVFHMIN-NEXT:  .LBB28_177: # %cond.store79
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    li a5, 9
+; RV32-ZVFHMIN-NEXT:    slli a5, a5, 10
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 756(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 80(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 512
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_46
+; RV32-ZVFHMIN-NEXT:  .LBB28_178: # %cond.store81
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 2
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, 896
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 630(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 82(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a2, 1024
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_47
+; RV32-ZVFHMIN-NEXT:  .LBB28_179: # %cond.store83
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 2
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, 768
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 504(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 84(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 20
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_48
+; RV32-ZVFHMIN-NEXT:  .LBB28_180: # %cond.store85
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 2
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, 640
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 378(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 86(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 19
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_49
+; RV32-ZVFHMIN-NEXT:  .LBB28_181: # %cond.store87
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    li a5, 17
+; RV32-ZVFHMIN-NEXT:    slli a5, a5, 9
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 252(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 88(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 18
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_50
+; RV32-ZVFHMIN-NEXT:  .LBB28_182: # %cond.store89
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 2
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, 384
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 126(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 90(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 17
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_51
+; RV32-ZVFHMIN-NEXT:    j .LBB28_52
+; RV32-ZVFHMIN-NEXT:  .LBB28_183: # %cond.store93
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 2
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, 128
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 2016(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 94(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 15
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_54
+; RV32-ZVFHMIN-NEXT:  .LBB28_184: # %cond.store95
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 2
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1890(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 96(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 14
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_55
+; RV32-ZVFHMIN-NEXT:  .LBB28_185: # %cond.store97
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 2
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, -128
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1764(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 98(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 13
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_56
+; RV32-ZVFHMIN-NEXT:  .LBB28_186: # %cond.store99
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    li a5, 31
+; RV32-ZVFHMIN-NEXT:    slli a5, a5, 8
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1638(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 100(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 12
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_57
+; RV32-ZVFHMIN-NEXT:  .LBB28_187: # %cond.store101
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 2
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, -384
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1512(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 102(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 11
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_58
+; RV32-ZVFHMIN-NEXT:  .LBB28_188: # %cond.store103
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    li a5, 15
+; RV32-ZVFHMIN-NEXT:    slli a5, a5, 9
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1386(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 104(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 10
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_59
+; RV32-ZVFHMIN-NEXT:  .LBB28_189: # %cond.store105
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 2
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, -640
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1260(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 106(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 9
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_60
+; RV32-ZVFHMIN-NEXT:  .LBB28_190: # %cond.store107
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    li a5, 29
+; RV32-ZVFHMIN-NEXT:    slli a5, a5, 8
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1134(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 108(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 8
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_61
+; RV32-ZVFHMIN-NEXT:  .LBB28_191: # %cond.store109
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 2
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, -896
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1008(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 110(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 7
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_62
+; RV32-ZVFHMIN-NEXT:  .LBB28_192: # %cond.store111
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    li a5, 7
+; RV32-ZVFHMIN-NEXT:    slli a5, a5, 10
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 882(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 112(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 6
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_63
+; RV32-ZVFHMIN-NEXT:  .LBB28_193: # %cond.store113
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 2
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, -1152
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 756(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 114(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 5
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_64
+; RV32-ZVFHMIN-NEXT:  .LBB28_194: # %cond.store115
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    li a5, 27
+; RV32-ZVFHMIN-NEXT:    slli a5, a5, 8
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 630(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 116(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 4
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_65
+; RV32-ZVFHMIN-NEXT:  .LBB28_195: # %cond.store117
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 2
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, -1408
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 504(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 118(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 3
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_66
+; RV32-ZVFHMIN-NEXT:  .LBB28_196: # %cond.store119
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    li a5, 13
+; RV32-ZVFHMIN-NEXT:    slli a5, a5, 9
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a3, 378(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 120(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a2, 2
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_67
+; RV32-ZVFHMIN-NEXT:    j .LBB28_68
+; RV32-ZVFHMIN-NEXT:  .LBB28_197: # %cond.store125
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 2
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, -1920
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v8, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a2, 0(a4)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 126(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 1
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_72
+; RV32-ZVFHMIN-NEXT:  .LBB28_198: # %cond.store127
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v16
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 128(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 2
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_73
+; RV32-ZVFHMIN-NEXT:  .LBB28_199: # %cond.store129
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 1
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 130(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 4
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_74
+; RV32-ZVFHMIN-NEXT:  .LBB28_200: # %cond.store131
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 2
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 132(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 8
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_75
+; RV32-ZVFHMIN-NEXT:  .LBB28_201: # %cond.store133
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 3
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 134(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 16
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_76
+; RV32-ZVFHMIN-NEXT:  .LBB28_202: # %cond.store135
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 4
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 136(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 32
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_77
+; RV32-ZVFHMIN-NEXT:  .LBB28_203: # %cond.store137
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 5
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 138(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 64
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_78
+; RV32-ZVFHMIN-NEXT:  .LBB28_204: # %cond.store139
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 6
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 140(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 128
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_79
+; RV32-ZVFHMIN-NEXT:  .LBB28_205: # %cond.store141
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 7
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 142(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 256
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_80
+; RV32-ZVFHMIN-NEXT:  .LBB28_206: # %cond.store143
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 8
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 144(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 512
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_81
+; RV32-ZVFHMIN-NEXT:  .LBB28_207: # %cond.store145
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 9
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 146(a0)
+; RV32-ZVFHMIN-NEXT:    andi a2, a3, 1024
+; RV32-ZVFHMIN-NEXT:    beqz a2, .LBB28_82
+; RV32-ZVFHMIN-NEXT:  .LBB28_208: # %cond.store147
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 10
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 148(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 20
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_83
+; RV32-ZVFHMIN-NEXT:  .LBB28_209: # %cond.store149
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 11
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 150(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 19
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_84
+; RV32-ZVFHMIN-NEXT:  .LBB28_210: # %cond.store151
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 12
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 152(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 18
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_85
+; RV32-ZVFHMIN-NEXT:  .LBB28_211: # %cond.store153
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 13
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 154(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 17
+; RV32-ZVFHMIN-NEXT:    bgez a2, .LBB28_86
+; RV32-ZVFHMIN-NEXT:  .LBB28_212: # %cond.store155
+; RV32-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV32-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 14
+; RV32-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 156(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a3, 16
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_87
+; RV32-ZVFHMIN-NEXT:    j .LBB28_88
+; RV32-ZVFHMIN-NEXT:  .LBB28_213: # %cond.store159
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    li a5, 3
+; RV32-ZVFHMIN-NEXT:    slli a5, a5, 11
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a4, 1890(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a4
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 160(a0)
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 14
+; RV32-ZVFHMIN-NEXT:    bgez a4, .LBB28_90
+; RV32-ZVFHMIN-NEXT:  .LBB28_214: # %cond.store161
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 1
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, 1920
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a4, 1764(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a4
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 162(a0)
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 13
+; RV32-ZVFHMIN-NEXT:    bgez a4, .LBB28_91
+; RV32-ZVFHMIN-NEXT:  .LBB28_215: # %cond.store163
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    li a5, 23
+; RV32-ZVFHMIN-NEXT:    slli a5, a5, 8
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a4, 1638(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a4
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 164(a0)
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 12
+; RV32-ZVFHMIN-NEXT:    bgez a4, .LBB28_92
+; RV32-ZVFHMIN-NEXT:  .LBB28_216: # %cond.store165
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 1
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, 1664
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a4, 1512(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a4
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 166(a0)
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 11
+; RV32-ZVFHMIN-NEXT:    bgez a4, .LBB28_93
+; RV32-ZVFHMIN-NEXT:  .LBB28_217: # %cond.store167
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    li a5, 11
+; RV32-ZVFHMIN-NEXT:    slli a5, a5, 9
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a4, 1386(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a4
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 168(a0)
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 10
+; RV32-ZVFHMIN-NEXT:    bgez a4, .LBB28_94
+; RV32-ZVFHMIN-NEXT:  .LBB28_218: # %cond.store169
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 1
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, 1408
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a4, 1260(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a4
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 170(a0)
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 9
+; RV32-ZVFHMIN-NEXT:    bgez a4, .LBB28_95
+; RV32-ZVFHMIN-NEXT:  .LBB28_219: # %cond.store171
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    li a5, 21
+; RV32-ZVFHMIN-NEXT:    slli a5, a5, 8
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a4, 1134(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a4
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 172(a0)
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 8
+; RV32-ZVFHMIN-NEXT:    bgez a4, .LBB28_96
+; RV32-ZVFHMIN-NEXT:  .LBB28_220: # %cond.store173
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 1
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, 1152
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a4, 1008(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a4
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 174(a0)
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 7
+; RV32-ZVFHMIN-NEXT:    bgez a4, .LBB28_97
+; RV32-ZVFHMIN-NEXT:  .LBB28_221: # %cond.store175
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    li a5, 5
+; RV32-ZVFHMIN-NEXT:    slli a5, a5, 10
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a4, 882(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a4
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 176(a0)
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 6
+; RV32-ZVFHMIN-NEXT:    bgez a4, .LBB28_98
+; RV32-ZVFHMIN-NEXT:  .LBB28_222: # %cond.store177
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 1
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, 896
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a4, 756(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a4
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 178(a0)
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 5
+; RV32-ZVFHMIN-NEXT:    bgez a4, .LBB28_99
+; RV32-ZVFHMIN-NEXT:  .LBB28_223: # %cond.store179
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    li a5, 19
+; RV32-ZVFHMIN-NEXT:    slli a5, a5, 8
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a4, 630(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a4
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 180(a0)
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 4
+; RV32-ZVFHMIN-NEXT:    bgez a4, .LBB28_100
+; RV32-ZVFHMIN-NEXT:  .LBB28_224: # %cond.store181
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    lui a5, 1
+; RV32-ZVFHMIN-NEXT:    addi a5, a5, 640
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a4, 504(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a4
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 182(a0)
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 3
+; RV32-ZVFHMIN-NEXT:    bgez a4, .LBB28_101
+; RV32-ZVFHMIN-NEXT:  .LBB28_225: # %cond.store183
+; RV32-ZVFHMIN-NEXT:    li a4, 64
+; RV32-ZVFHMIN-NEXT:    li a5, 9
+; RV32-ZVFHMIN-NEXT:    slli a5, a5, 9
+; RV32-ZVFHMIN-NEXT:    add a5, sp, a5
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a4, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a5)
+; RV32-ZVFHMIN-NEXT:    lh a4, 378(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a4
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 184(a0)
+; RV32-ZVFHMIN-NEXT:    slli a4, a3, 2
+; RV32-ZVFHMIN-NEXT:    bltz a4, .LBB28_102
+; RV32-ZVFHMIN-NEXT:    j .LBB28_103
+; RV32-ZVFHMIN-NEXT:  .LBB28_226: # %cond.store191
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    lui a4, 1
+; RV32-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 2016(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 192(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 2
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_109
+; RV32-ZVFHMIN-NEXT:  .LBB28_227: # %cond.store193
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 1921
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1890(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 194(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 4
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_110
+; RV32-ZVFHMIN-NEXT:  .LBB28_228: # %cond.store195
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 1793
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1764(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 196(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 8
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_111
+; RV32-ZVFHMIN-NEXT:  .LBB28_229: # %cond.store197
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 1665
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1638(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 198(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 16
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_112
+; RV32-ZVFHMIN-NEXT:  .LBB28_230: # %cond.store199
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 1537
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1512(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 200(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 32
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_113
+; RV32-ZVFHMIN-NEXT:  .LBB28_231: # %cond.store201
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 1409
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1386(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 202(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 64
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_114
+; RV32-ZVFHMIN-NEXT:  .LBB28_232: # %cond.store203
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 1281
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1260(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 204(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 128
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_115
+; RV32-ZVFHMIN-NEXT:  .LBB28_233: # %cond.store205
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 1153
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1134(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 206(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 256
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_116
+; RV32-ZVFHMIN-NEXT:  .LBB28_234: # %cond.store207
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 1025
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 1008(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 208(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 512
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_117
+; RV32-ZVFHMIN-NEXT:  .LBB28_235: # %cond.store209
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 897
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 882(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 210(a0)
+; RV32-ZVFHMIN-NEXT:    andi a3, a1, 1024
+; RV32-ZVFHMIN-NEXT:    beqz a3, .LBB28_118
+; RV32-ZVFHMIN-NEXT:  .LBB28_236: # %cond.store211
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 769
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 756(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 212(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a1, 20
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_119
+; RV32-ZVFHMIN-NEXT:  .LBB28_237: # %cond.store213
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 641
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 630(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 214(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a1, 19
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_120
+; RV32-ZVFHMIN-NEXT:  .LBB28_238: # %cond.store215
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 513
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 504(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 216(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a1, 18
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_121
+; RV32-ZVFHMIN-NEXT:  .LBB28_239: # %cond.store217
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 385
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 378(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 218(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a1, 17
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_122
+; RV32-ZVFHMIN-NEXT:  .LBB28_240: # %cond.store219
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 257
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 252(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 220(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a1, 16
+; RV32-ZVFHMIN-NEXT:    bgez a3, .LBB28_123
+; RV32-ZVFHMIN-NEXT:  .LBB28_241: # %cond.store221
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 129
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a3, 126(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 222(a0)
+; RV32-ZVFHMIN-NEXT:    slli a3, a1, 15
+; RV32-ZVFHMIN-NEXT:    bltz a3, .LBB28_242
+; RV32-ZVFHMIN-NEXT:    j .LBB28_124
+; RV32-ZVFHMIN-NEXT:  .LBB28_242: # %cond.store223
+; RV32-ZVFHMIN-NEXT:    li a3, 64
+; RV32-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV32-ZVFHMIN-NEXT:    addi a4, a4, 1
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV32-ZVFHMIN-NEXT:    lh a2, 0(a2)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 224(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 14
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_243
+; RV32-ZVFHMIN-NEXT:    j .LBB28_125
+; RV32-ZVFHMIN-NEXT:  .LBB28_243: # %cond.store225
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 1920
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 2018(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 226(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 13
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_244
+; RV32-ZVFHMIN-NEXT:    j .LBB28_126
+; RV32-ZVFHMIN-NEXT:  .LBB28_244: # %cond.store227
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 1792
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 1892(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 228(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 12
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_245
+; RV32-ZVFHMIN-NEXT:    j .LBB28_127
+; RV32-ZVFHMIN-NEXT:  .LBB28_245: # %cond.store229
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 1664
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 1766(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 230(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 11
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_246
+; RV32-ZVFHMIN-NEXT:    j .LBB28_128
+; RV32-ZVFHMIN-NEXT:  .LBB28_246: # %cond.store231
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 1536
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 1640(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 232(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 10
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_247
+; RV32-ZVFHMIN-NEXT:    j .LBB28_129
+; RV32-ZVFHMIN-NEXT:  .LBB28_247: # %cond.store233
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 1408
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 1514(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 234(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 9
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_248
+; RV32-ZVFHMIN-NEXT:    j .LBB28_130
+; RV32-ZVFHMIN-NEXT:  .LBB28_248: # %cond.store235
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 1280
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 1388(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 236(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 8
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_249
+; RV32-ZVFHMIN-NEXT:    j .LBB28_131
+; RV32-ZVFHMIN-NEXT:  .LBB28_249: # %cond.store237
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 1152
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 1262(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 238(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 7
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_250
+; RV32-ZVFHMIN-NEXT:    j .LBB28_132
+; RV32-ZVFHMIN-NEXT:  .LBB28_250: # %cond.store239
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 1024
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 1136(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 240(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 6
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_251
+; RV32-ZVFHMIN-NEXT:    j .LBB28_133
+; RV32-ZVFHMIN-NEXT:  .LBB28_251: # %cond.store241
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 896
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 1010(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 242(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 5
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_252
+; RV32-ZVFHMIN-NEXT:    j .LBB28_134
+; RV32-ZVFHMIN-NEXT:  .LBB28_252: # %cond.store243
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 768
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 884(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 244(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 4
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_253
+; RV32-ZVFHMIN-NEXT:    j .LBB28_135
+; RV32-ZVFHMIN-NEXT:  .LBB28_253: # %cond.store245
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 640
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 758(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 246(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 3
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_254
+; RV32-ZVFHMIN-NEXT:    j .LBB28_136
+; RV32-ZVFHMIN-NEXT:  .LBB28_254: # %cond.store247
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 512
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 632(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 248(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 2
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_255
+; RV32-ZVFHMIN-NEXT:    j .LBB28_137
+; RV32-ZVFHMIN-NEXT:  .LBB28_255: # %cond.store249
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 384
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 506(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 250(a0)
+; RV32-ZVFHMIN-NEXT:    slli a2, a1, 1
+; RV32-ZVFHMIN-NEXT:    bltz a2, .LBB28_256
+; RV32-ZVFHMIN-NEXT:    j .LBB28_138
+; RV32-ZVFHMIN-NEXT:  .LBB28_256: # %cond.store251
+; RV32-ZVFHMIN-NEXT:    li a2, 64
+; RV32-ZVFHMIN-NEXT:    addi a3, sp, 256
+; RV32-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV32-ZVFHMIN-NEXT:    vse16.v v16, (a3)
+; RV32-ZVFHMIN-NEXT:    lh a2, 380(sp)
+; RV32-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV32-ZVFHMIN-NEXT:    fsh fa5, 252(a0)
+; RV32-ZVFHMIN-NEXT:    bgez a1, .LBB28_257
+; RV32-ZVFHMIN-NEXT:    j .LBB28_139
+; RV32-ZVFHMIN-NEXT:  .LBB28_257: # %cond.store251
+; RV32-ZVFHMIN-NEXT:    j .LBB28_140
+;
+; RV64-ZVFHMIN-LABEL: masked_store_v128f16:
+; RV64-ZVFHMIN:       # %bb.0:
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v0
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 1
+; RV64-ZVFHMIN-NEXT:    bnez a1, .LBB28_137
+; RV64-ZVFHMIN-NEXT:  # %bb.1: # %else
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 2
+; RV64-ZVFHMIN-NEXT:    bnez a1, .LBB28_138
+; RV64-ZVFHMIN-NEXT:  .LBB28_2: # %else2
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 4
+; RV64-ZVFHMIN-NEXT:    bnez a1, .LBB28_139
+; RV64-ZVFHMIN-NEXT:  .LBB28_3: # %else4
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 8
+; RV64-ZVFHMIN-NEXT:    bnez a1, .LBB28_140
+; RV64-ZVFHMIN-NEXT:  .LBB28_4: # %else6
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 16
+; RV64-ZVFHMIN-NEXT:    bnez a1, .LBB28_141
+; RV64-ZVFHMIN-NEXT:  .LBB28_5: # %else8
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 32
+; RV64-ZVFHMIN-NEXT:    bnez a1, .LBB28_142
+; RV64-ZVFHMIN-NEXT:  .LBB28_6: # %else10
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 64
+; RV64-ZVFHMIN-NEXT:    bnez a1, .LBB28_143
+; RV64-ZVFHMIN-NEXT:  .LBB28_7: # %else12
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 128
+; RV64-ZVFHMIN-NEXT:    bnez a1, .LBB28_144
+; RV64-ZVFHMIN-NEXT:  .LBB28_8: # %else14
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 256
+; RV64-ZVFHMIN-NEXT:    bnez a1, .LBB28_145
+; RV64-ZVFHMIN-NEXT:  .LBB28_9: # %else16
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 512
+; RV64-ZVFHMIN-NEXT:    bnez a1, .LBB28_146
+; RV64-ZVFHMIN-NEXT:  .LBB28_10: # %else18
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 1024
+; RV64-ZVFHMIN-NEXT:    bnez a1, .LBB28_147
+; RV64-ZVFHMIN-NEXT:  .LBB28_11: # %else20
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 52
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_148
+; RV64-ZVFHMIN-NEXT:  .LBB28_12: # %else22
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 51
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_149
+; RV64-ZVFHMIN-NEXT:  .LBB28_13: # %else24
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 50
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_150
+; RV64-ZVFHMIN-NEXT:  .LBB28_14: # %else26
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 49
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_151
+; RV64-ZVFHMIN-NEXT:  .LBB28_15: # %else28
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 48
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_17
+; RV64-ZVFHMIN-NEXT:  .LBB28_16: # %cond.store29
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 15
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 30(a0)
+; RV64-ZVFHMIN-NEXT:  .LBB28_17: # %else30
+; RV64-ZVFHMIN-NEXT:    addi sp, sp, -2032
+; RV64-ZVFHMIN-NEXT:    .cfi_def_cfa_offset 2032
+; RV64-ZVFHMIN-NEXT:    sd ra, 2024(sp) # 8-byte Folded Spill
+; RV64-ZVFHMIN-NEXT:    sd s0, 2016(sp) # 8-byte Folded Spill
+; RV64-ZVFHMIN-NEXT:    .cfi_offset ra, -8
+; RV64-ZVFHMIN-NEXT:    .cfi_offset s0, -16
+; RV64-ZVFHMIN-NEXT:    addi s0, sp, 2032
+; RV64-ZVFHMIN-NEXT:    .cfi_def_cfa s0, 0
+; RV64-ZVFHMIN-NEXT:    .cfi_remember_state
+; RV64-ZVFHMIN-NEXT:    lui a1, 3
+; RV64-ZVFHMIN-NEXT:    addiw a1, a1, -1776
+; RV64-ZVFHMIN-NEXT:    sub sp, sp, a1
+; RV64-ZVFHMIN-NEXT:    andi sp, sp, -128
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 47
+; RV64-ZVFHMIN-NEXT:    lui a1, 3
+; RV64-ZVFHMIN-NEXT:    addiw a1, a1, -1606
+; RV64-ZVFHMIN-NEXT:    add a1, sp, a1
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_152
+; RV64-ZVFHMIN-NEXT:  # %bb.18: # %else32
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 46
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_153
+; RV64-ZVFHMIN-NEXT:  .LBB28_19: # %else34
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 45
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_154
+; RV64-ZVFHMIN-NEXT:  .LBB28_20: # %else36
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 44
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_155
+; RV64-ZVFHMIN-NEXT:  .LBB28_21: # %else38
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 43
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_156
+; RV64-ZVFHMIN-NEXT:  .LBB28_22: # %else40
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 42
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_157
+; RV64-ZVFHMIN-NEXT:  .LBB28_23: # %else42
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 41
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_158
+; RV64-ZVFHMIN-NEXT:  .LBB28_24: # %else44
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 40
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_159
+; RV64-ZVFHMIN-NEXT:  .LBB28_25: # %else46
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 39
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_160
+; RV64-ZVFHMIN-NEXT:  .LBB28_26: # %else48
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 38
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_161
+; RV64-ZVFHMIN-NEXT:  .LBB28_27: # %else50
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 37
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_162
+; RV64-ZVFHMIN-NEXT:  .LBB28_28: # %else52
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 36
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_163
+; RV64-ZVFHMIN-NEXT:  .LBB28_29: # %else54
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 35
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_164
+; RV64-ZVFHMIN-NEXT:  .LBB28_30: # %else56
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 34
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_32
+; RV64-ZVFHMIN-NEXT:  .LBB28_31: # %cond.store57
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 3
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, -1664
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a1, 0(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 58(a0)
+; RV64-ZVFHMIN-NEXT:  .LBB28_32: # %else58
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 33
+; RV64-ZVFHMIN-NEXT:    lui a1, 2
+; RV64-ZVFHMIN-NEXT:    addiw a1, a1, 348
+; RV64-ZVFHMIN-NEXT:    add a1, sp, a1
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_165
+; RV64-ZVFHMIN-NEXT:  # %bb.33: # %else60
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 32
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_166
+; RV64-ZVFHMIN-NEXT:  .LBB28_34: # %else62
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 31
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_167
+; RV64-ZVFHMIN-NEXT:  .LBB28_35: # %else64
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 30
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_168
+; RV64-ZVFHMIN-NEXT:  .LBB28_36: # %else66
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 29
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_169
+; RV64-ZVFHMIN-NEXT:  .LBB28_37: # %else68
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 28
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_170
+; RV64-ZVFHMIN-NEXT:  .LBB28_38: # %else70
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 27
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_171
+; RV64-ZVFHMIN-NEXT:  .LBB28_39: # %else72
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 26
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_172
+; RV64-ZVFHMIN-NEXT:  .LBB28_40: # %else74
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 25
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_173
+; RV64-ZVFHMIN-NEXT:  .LBB28_41: # %else76
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 24
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_174
+; RV64-ZVFHMIN-NEXT:  .LBB28_42: # %else78
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 23
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_175
+; RV64-ZVFHMIN-NEXT:  .LBB28_43: # %else80
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 22
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_176
+; RV64-ZVFHMIN-NEXT:  .LBB28_44: # %else82
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 21
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_177
+; RV64-ZVFHMIN-NEXT:  .LBB28_45: # %else84
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 20
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_178
+; RV64-ZVFHMIN-NEXT:  .LBB28_46: # %else86
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 19
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_179
+; RV64-ZVFHMIN-NEXT:  .LBB28_47: # %else88
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 18
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_180
+; RV64-ZVFHMIN-NEXT:  .LBB28_48: # %else90
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 17
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_50
+; RV64-ZVFHMIN-NEXT:  .LBB28_49: # %cond.store91
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 2
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, 256
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a1, 0(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 92(a0)
+; RV64-ZVFHMIN-NEXT:  .LBB28_50: # %else92
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 16
+; RV64-ZVFHMIN-NEXT:    lui a3, 2
+; RV64-ZVFHMIN-NEXT:    addiw a3, a3, -1794
+; RV64-ZVFHMIN-NEXT:    add a3, sp, a3
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_181
+; RV64-ZVFHMIN-NEXT:  # %bb.51: # %else94
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 15
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_182
+; RV64-ZVFHMIN-NEXT:  .LBB28_52: # %else96
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 14
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_183
+; RV64-ZVFHMIN-NEXT:  .LBB28_53: # %else98
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 13
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_184
+; RV64-ZVFHMIN-NEXT:  .LBB28_54: # %else100
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 12
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_185
+; RV64-ZVFHMIN-NEXT:  .LBB28_55: # %else102
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 11
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_186
+; RV64-ZVFHMIN-NEXT:  .LBB28_56: # %else104
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 10
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_187
+; RV64-ZVFHMIN-NEXT:  .LBB28_57: # %else106
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 9
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_188
+; RV64-ZVFHMIN-NEXT:  .LBB28_58: # %else108
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 8
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_189
+; RV64-ZVFHMIN-NEXT:  .LBB28_59: # %else110
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 7
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_190
+; RV64-ZVFHMIN-NEXT:  .LBB28_60: # %else112
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 6
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_191
+; RV64-ZVFHMIN-NEXT:  .LBB28_61: # %else114
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 5
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_192
+; RV64-ZVFHMIN-NEXT:  .LBB28_62: # %else116
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 4
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_193
+; RV64-ZVFHMIN-NEXT:  .LBB28_63: # %else118
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 3
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_194
+; RV64-ZVFHMIN-NEXT:  .LBB28_64: # %else120
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 2
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_66
+; RV64-ZVFHMIN-NEXT:  .LBB28_65: # %cond.store121
+; RV64-ZVFHMIN-NEXT:    li a1, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 2
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, -1664
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a1, 252(a3)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 122(a0)
+; RV64-ZVFHMIN-NEXT:  .LBB28_66: # %else122
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 1
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v24, v0, 1
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_68
+; RV64-ZVFHMIN-NEXT:  # %bb.67: # %cond.store123
+; RV64-ZVFHMIN-NEXT:    li a1, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 25
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 8
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a1, 126(a3)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 124(a0)
+; RV64-ZVFHMIN-NEXT:  .LBB28_68: # %else124
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_195
+; RV64-ZVFHMIN-NEXT:  # %bb.69: # %else126
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 1
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB28_196
+; RV64-ZVFHMIN-NEXT:  .LBB28_70: # %else128
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 2
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB28_197
+; RV64-ZVFHMIN-NEXT:  .LBB28_71: # %else130
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 4
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB28_198
+; RV64-ZVFHMIN-NEXT:  .LBB28_72: # %else132
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 8
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB28_199
+; RV64-ZVFHMIN-NEXT:  .LBB28_73: # %else134
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 16
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB28_200
+; RV64-ZVFHMIN-NEXT:  .LBB28_74: # %else136
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 32
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB28_201
+; RV64-ZVFHMIN-NEXT:  .LBB28_75: # %else138
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 64
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB28_202
+; RV64-ZVFHMIN-NEXT:  .LBB28_76: # %else140
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 128
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB28_203
+; RV64-ZVFHMIN-NEXT:  .LBB28_77: # %else142
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 256
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB28_204
+; RV64-ZVFHMIN-NEXT:  .LBB28_78: # %else144
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 512
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB28_205
+; RV64-ZVFHMIN-NEXT:  .LBB28_79: # %else146
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 1024
+; RV64-ZVFHMIN-NEXT:    bnez a2, .LBB28_206
+; RV64-ZVFHMIN-NEXT:  .LBB28_80: # %else148
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 52
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_207
+; RV64-ZVFHMIN-NEXT:  .LBB28_81: # %else150
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 51
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_208
+; RV64-ZVFHMIN-NEXT:  .LBB28_82: # %else152
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 50
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_209
+; RV64-ZVFHMIN-NEXT:  .LBB28_83: # %else154
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 49
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_210
+; RV64-ZVFHMIN-NEXT:  .LBB28_84: # %else156
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 48
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_86
+; RV64-ZVFHMIN-NEXT:  .LBB28_85: # %cond.store157
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 15
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 158(a0)
+; RV64-ZVFHMIN-NEXT:  .LBB28_86: # %else158
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 47
+; RV64-ZVFHMIN-NEXT:    lui a2, 1
+; RV64-ZVFHMIN-NEXT:    addiw a2, a2, 190
+; RV64-ZVFHMIN-NEXT:    add a2, sp, a2
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_211
+; RV64-ZVFHMIN-NEXT:  # %bb.87: # %else160
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 46
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_212
+; RV64-ZVFHMIN-NEXT:  .LBB28_88: # %else162
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 45
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_213
+; RV64-ZVFHMIN-NEXT:  .LBB28_89: # %else164
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 44
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_214
+; RV64-ZVFHMIN-NEXT:  .LBB28_90: # %else166
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 43
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_215
+; RV64-ZVFHMIN-NEXT:  .LBB28_91: # %else168
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 42
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_216
+; RV64-ZVFHMIN-NEXT:  .LBB28_92: # %else170
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 41
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_217
+; RV64-ZVFHMIN-NEXT:  .LBB28_93: # %else172
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 40
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_218
+; RV64-ZVFHMIN-NEXT:  .LBB28_94: # %else174
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 39
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_219
+; RV64-ZVFHMIN-NEXT:  .LBB28_95: # %else176
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 38
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_220
+; RV64-ZVFHMIN-NEXT:  .LBB28_96: # %else178
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 37
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_221
+; RV64-ZVFHMIN-NEXT:  .LBB28_97: # %else180
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 36
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_222
+; RV64-ZVFHMIN-NEXT:  .LBB28_98: # %else182
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 35
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_223
+; RV64-ZVFHMIN-NEXT:  .LBB28_99: # %else184
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 34
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_224
+; RV64-ZVFHMIN-NEXT:  .LBB28_100: # %else186
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 33
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_225
+; RV64-ZVFHMIN-NEXT:  .LBB28_101: # %else188
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 32
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_103
+; RV64-ZVFHMIN-NEXT:  .LBB28_102: # %cond.store189
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 1
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, 128
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a2, 0(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 190(a0)
+; RV64-ZVFHMIN-NEXT:  .LBB28_103: # %else190
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 31
+; RV64-ZVFHMIN-NEXT:    addi a2, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a2, a2, 97
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_226
+; RV64-ZVFHMIN-NEXT:  # %bb.104: # %else192
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 30
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_227
+; RV64-ZVFHMIN-NEXT:  .LBB28_105: # %else194
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 29
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_228
+; RV64-ZVFHMIN-NEXT:  .LBB28_106: # %else196
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 28
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_229
+; RV64-ZVFHMIN-NEXT:  .LBB28_107: # %else198
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 27
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_230
+; RV64-ZVFHMIN-NEXT:  .LBB28_108: # %else200
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 26
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_231
+; RV64-ZVFHMIN-NEXT:  .LBB28_109: # %else202
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 25
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_232
+; RV64-ZVFHMIN-NEXT:  .LBB28_110: # %else204
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 24
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_233
+; RV64-ZVFHMIN-NEXT:  .LBB28_111: # %else206
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 23
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_234
+; RV64-ZVFHMIN-NEXT:  .LBB28_112: # %else208
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 22
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_113
+; RV64-ZVFHMIN-NEXT:    j .LBB28_235
+; RV64-ZVFHMIN-NEXT:  .LBB28_113: # %else210
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 21
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_114
+; RV64-ZVFHMIN-NEXT:    j .LBB28_236
+; RV64-ZVFHMIN-NEXT:  .LBB28_114: # %else212
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 20
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_115
+; RV64-ZVFHMIN-NEXT:    j .LBB28_237
+; RV64-ZVFHMIN-NEXT:  .LBB28_115: # %else214
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 19
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_116
+; RV64-ZVFHMIN-NEXT:    j .LBB28_238
+; RV64-ZVFHMIN-NEXT:  .LBB28_116: # %else216
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 18
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_117
+; RV64-ZVFHMIN-NEXT:    j .LBB28_239
+; RV64-ZVFHMIN-NEXT:  .LBB28_117: # %else218
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 17
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_118
+; RV64-ZVFHMIN-NEXT:    j .LBB28_240
+; RV64-ZVFHMIN-NEXT:  .LBB28_118: # %else220
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 16
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_119
+; RV64-ZVFHMIN-NEXT:    j .LBB28_241
+; RV64-ZVFHMIN-NEXT:  .LBB28_119: # %else222
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 15
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_120
+; RV64-ZVFHMIN-NEXT:    j .LBB28_242
+; RV64-ZVFHMIN-NEXT:  .LBB28_120: # %else224
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 14
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_121
+; RV64-ZVFHMIN-NEXT:    j .LBB28_243
+; RV64-ZVFHMIN-NEXT:  .LBB28_121: # %else226
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 13
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_122
+; RV64-ZVFHMIN-NEXT:    j .LBB28_244
+; RV64-ZVFHMIN-NEXT:  .LBB28_122: # %else228
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 12
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_123
+; RV64-ZVFHMIN-NEXT:    j .LBB28_245
+; RV64-ZVFHMIN-NEXT:  .LBB28_123: # %else230
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 11
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_124
+; RV64-ZVFHMIN-NEXT:    j .LBB28_246
+; RV64-ZVFHMIN-NEXT:  .LBB28_124: # %else232
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 10
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_125
+; RV64-ZVFHMIN-NEXT:    j .LBB28_247
+; RV64-ZVFHMIN-NEXT:  .LBB28_125: # %else234
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 9
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_126
+; RV64-ZVFHMIN-NEXT:    j .LBB28_248
+; RV64-ZVFHMIN-NEXT:  .LBB28_126: # %else236
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 8
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_127
+; RV64-ZVFHMIN-NEXT:    j .LBB28_249
+; RV64-ZVFHMIN-NEXT:  .LBB28_127: # %else238
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 7
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_128
+; RV64-ZVFHMIN-NEXT:    j .LBB28_250
+; RV64-ZVFHMIN-NEXT:  .LBB28_128: # %else240
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 6
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_129
+; RV64-ZVFHMIN-NEXT:    j .LBB28_251
+; RV64-ZVFHMIN-NEXT:  .LBB28_129: # %else242
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 5
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_130
+; RV64-ZVFHMIN-NEXT:    j .LBB28_252
+; RV64-ZVFHMIN-NEXT:  .LBB28_130: # %else244
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 4
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_131
+; RV64-ZVFHMIN-NEXT:    j .LBB28_253
+; RV64-ZVFHMIN-NEXT:  .LBB28_131: # %else246
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 3
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_132
+; RV64-ZVFHMIN-NEXT:    j .LBB28_254
+; RV64-ZVFHMIN-NEXT:  .LBB28_132: # %else248
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 2
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_133
+; RV64-ZVFHMIN-NEXT:    j .LBB28_255
+; RV64-ZVFHMIN-NEXT:  .LBB28_133: # %else250
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 1
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_134
+; RV64-ZVFHMIN-NEXT:    j .LBB28_256
+; RV64-ZVFHMIN-NEXT:  .LBB28_134: # %else252
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_136
+; RV64-ZVFHMIN-NEXT:  .LBB28_135: # %cond.store253
+; RV64-ZVFHMIN-NEXT:    li a1, 64
+; RV64-ZVFHMIN-NEXT:    addi a2, sp, 128
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a2)
+; RV64-ZVFHMIN-NEXT:    lh a1, 254(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 254(a0)
+; RV64-ZVFHMIN-NEXT:  .LBB28_136: # %else254
+; RV64-ZVFHMIN-NEXT:    addi sp, s0, -2032
+; RV64-ZVFHMIN-NEXT:    .cfi_def_cfa sp, 2032
+; RV64-ZVFHMIN-NEXT:    ld ra, 2024(sp) # 8-byte Folded Reload
+; RV64-ZVFHMIN-NEXT:    ld s0, 2016(sp) # 8-byte Folded Reload
+; RV64-ZVFHMIN-NEXT:    .cfi_restore ra
+; RV64-ZVFHMIN-NEXT:    .cfi_restore s0
+; RV64-ZVFHMIN-NEXT:    addi sp, sp, 2032
+; RV64-ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
+; RV64-ZVFHMIN-NEXT:    ret
+; RV64-ZVFHMIN-NEXT:  .LBB28_137: # %cond.store
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v8
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 0(a0)
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 2
+; RV64-ZVFHMIN-NEXT:    beqz a1, .LBB28_2
+; RV64-ZVFHMIN-NEXT:  .LBB28_138: # %cond.store1
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 1
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 2(a0)
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 4
+; RV64-ZVFHMIN-NEXT:    beqz a1, .LBB28_3
+; RV64-ZVFHMIN-NEXT:  .LBB28_139: # %cond.store3
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 2
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 4(a0)
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 8
+; RV64-ZVFHMIN-NEXT:    beqz a1, .LBB28_4
+; RV64-ZVFHMIN-NEXT:  .LBB28_140: # %cond.store5
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 3
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 6(a0)
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 16
+; RV64-ZVFHMIN-NEXT:    beqz a1, .LBB28_5
+; RV64-ZVFHMIN-NEXT:  .LBB28_141: # %cond.store7
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 4
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 8(a0)
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 32
+; RV64-ZVFHMIN-NEXT:    beqz a1, .LBB28_6
+; RV64-ZVFHMIN-NEXT:  .LBB28_142: # %cond.store9
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 5
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 10(a0)
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 64
+; RV64-ZVFHMIN-NEXT:    beqz a1, .LBB28_7
+; RV64-ZVFHMIN-NEXT:  .LBB28_143: # %cond.store11
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 6
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 12(a0)
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 128
+; RV64-ZVFHMIN-NEXT:    beqz a1, .LBB28_8
+; RV64-ZVFHMIN-NEXT:  .LBB28_144: # %cond.store13
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 7
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 14(a0)
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 256
+; RV64-ZVFHMIN-NEXT:    beqz a1, .LBB28_9
+; RV64-ZVFHMIN-NEXT:  .LBB28_145: # %cond.store15
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 8
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 16(a0)
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 512
+; RV64-ZVFHMIN-NEXT:    beqz a1, .LBB28_10
+; RV64-ZVFHMIN-NEXT:  .LBB28_146: # %cond.store17
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 9
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 18(a0)
+; RV64-ZVFHMIN-NEXT:    andi a1, a2, 1024
+; RV64-ZVFHMIN-NEXT:    beqz a1, .LBB28_11
+; RV64-ZVFHMIN-NEXT:  .LBB28_147: # %cond.store19
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 10
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 20(a0)
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 52
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_12
+; RV64-ZVFHMIN-NEXT:  .LBB28_148: # %cond.store21
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 11
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 22(a0)
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 51
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_13
+; RV64-ZVFHMIN-NEXT:  .LBB28_149: # %cond.store23
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 12
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 24(a0)
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 50
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_14
+; RV64-ZVFHMIN-NEXT:  .LBB28_150: # %cond.store25
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 13
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 26(a0)
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 49
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_15
+; RV64-ZVFHMIN-NEXT:  .LBB28_151: # %cond.store27
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v24, v8, 14
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a1, v24
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 28(a0)
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 48
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_16
+; RV64-ZVFHMIN-NEXT:    j .LBB28_17
+; RV64-ZVFHMIN-NEXT:  .LBB28_152: # %cond.store31
+; RV64-ZVFHMIN-NEXT:    .cfi_restore_state
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 3
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1638(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 32(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 46
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_19
+; RV64-ZVFHMIN-NEXT:  .LBB28_153: # %cond.store33
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 3
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, -128
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1512(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 34(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 45
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_20
+; RV64-ZVFHMIN-NEXT:  .LBB28_154: # %cond.store35
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 3
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, -256
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1386(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 36(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 44
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_21
+; RV64-ZVFHMIN-NEXT:  .LBB28_155: # %cond.store37
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 3
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, -384
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1260(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 38(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 43
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_22
+; RV64-ZVFHMIN-NEXT:  .LBB28_156: # %cond.store39
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 23
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 9
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1134(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 40(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 42
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_23
+; RV64-ZVFHMIN-NEXT:  .LBB28_157: # %cond.store41
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 3
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, -640
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1008(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 42(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 41
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_24
+; RV64-ZVFHMIN-NEXT:  .LBB28_158: # %cond.store43
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 3
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, -768
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 882(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 44(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 40
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_25
+; RV64-ZVFHMIN-NEXT:  .LBB28_159: # %cond.store45
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 3
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, -896
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 756(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 46(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 39
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_26
+; RV64-ZVFHMIN-NEXT:  .LBB28_160: # %cond.store47
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 11
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 10
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 630(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 48(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 38
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_27
+; RV64-ZVFHMIN-NEXT:  .LBB28_161: # %cond.store49
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 3
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, -1152
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 504(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 50(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 37
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_28
+; RV64-ZVFHMIN-NEXT:  .LBB28_162: # %cond.store51
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 3
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, -1280
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 378(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 52(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 36
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_29
+; RV64-ZVFHMIN-NEXT:  .LBB28_163: # %cond.store53
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 3
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, -1408
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 252(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 54(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 35
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_30
+; RV64-ZVFHMIN-NEXT:  .LBB28_164: # %cond.store55
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 21
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 9
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 126(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 56(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 34
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_31
+; RV64-ZVFHMIN-NEXT:    j .LBB28_32
+; RV64-ZVFHMIN-NEXT:  .LBB28_165: # %cond.store59
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 3
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, -1792
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 2016(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 60(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 32
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_34
+; RV64-ZVFHMIN-NEXT:  .LBB28_166: # %cond.store61
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 3
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, -1920
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1890(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 62(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 31
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_35
+; RV64-ZVFHMIN-NEXT:  .LBB28_167: # %cond.store63
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 5
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 11
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1764(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 64(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 30
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_36
+; RV64-ZVFHMIN-NEXT:  .LBB28_168: # %cond.store65
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 2
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, 1920
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1638(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 66(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 29
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_37
+; RV64-ZVFHMIN-NEXT:  .LBB28_169: # %cond.store67
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 2
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, 1792
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1512(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 68(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 28
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_38
+; RV64-ZVFHMIN-NEXT:  .LBB28_170: # %cond.store69
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 2
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, 1664
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1386(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 70(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 27
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_39
+; RV64-ZVFHMIN-NEXT:  .LBB28_171: # %cond.store71
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 19
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 9
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1260(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 72(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 26
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_40
+; RV64-ZVFHMIN-NEXT:  .LBB28_172: # %cond.store73
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 2
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, 1408
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1134(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 74(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 25
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_41
+; RV64-ZVFHMIN-NEXT:  .LBB28_173: # %cond.store75
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 2
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, 1280
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1008(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 76(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 24
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_42
+; RV64-ZVFHMIN-NEXT:  .LBB28_174: # %cond.store77
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 2
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, 1152
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 882(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 78(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 23
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_43
+; RV64-ZVFHMIN-NEXT:  .LBB28_175: # %cond.store79
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 9
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 10
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 756(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 80(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 22
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_44
+; RV64-ZVFHMIN-NEXT:  .LBB28_176: # %cond.store81
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 2
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, 896
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 630(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 82(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 21
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_45
+; RV64-ZVFHMIN-NEXT:  .LBB28_177: # %cond.store83
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 2
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, 768
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 504(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 84(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 20
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_46
+; RV64-ZVFHMIN-NEXT:  .LBB28_178: # %cond.store85
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 2
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, 640
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 378(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 86(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 19
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_47
+; RV64-ZVFHMIN-NEXT:  .LBB28_179: # %cond.store87
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 17
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 9
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 252(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 88(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 18
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_48
+; RV64-ZVFHMIN-NEXT:  .LBB28_180: # %cond.store89
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 2
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, 384
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 126(a1)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 90(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a2, 17
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_49
+; RV64-ZVFHMIN-NEXT:    j .LBB28_50
+; RV64-ZVFHMIN-NEXT:  .LBB28_181: # %cond.store93
+; RV64-ZVFHMIN-NEXT:    li a1, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 2
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, 128
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a1, 2016(a3)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 94(a0)
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 15
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_52
+; RV64-ZVFHMIN-NEXT:  .LBB28_182: # %cond.store95
+; RV64-ZVFHMIN-NEXT:    li a1, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 2
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a1, 1890(a3)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 96(a0)
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 14
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_53
+; RV64-ZVFHMIN-NEXT:  .LBB28_183: # %cond.store97
+; RV64-ZVFHMIN-NEXT:    li a1, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 2
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, -128
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a1, 1764(a3)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 98(a0)
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 13
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_54
+; RV64-ZVFHMIN-NEXT:  .LBB28_184: # %cond.store99
+; RV64-ZVFHMIN-NEXT:    li a1, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 31
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 8
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a1, 1638(a3)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 100(a0)
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 12
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_55
+; RV64-ZVFHMIN-NEXT:  .LBB28_185: # %cond.store101
+; RV64-ZVFHMIN-NEXT:    li a1, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 2
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, -384
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a1, 1512(a3)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 102(a0)
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 11
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_56
+; RV64-ZVFHMIN-NEXT:  .LBB28_186: # %cond.store103
+; RV64-ZVFHMIN-NEXT:    li a1, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 15
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 9
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a1, 1386(a3)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 104(a0)
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 10
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_57
+; RV64-ZVFHMIN-NEXT:  .LBB28_187: # %cond.store105
+; RV64-ZVFHMIN-NEXT:    li a1, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 2
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, -640
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a1, 1260(a3)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 106(a0)
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 9
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_58
+; RV64-ZVFHMIN-NEXT:  .LBB28_188: # %cond.store107
+; RV64-ZVFHMIN-NEXT:    li a1, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 29
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 8
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a1, 1134(a3)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 108(a0)
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 8
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_59
+; RV64-ZVFHMIN-NEXT:  .LBB28_189: # %cond.store109
+; RV64-ZVFHMIN-NEXT:    li a1, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 2
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, -896
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a1, 1008(a3)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 110(a0)
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 7
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_60
+; RV64-ZVFHMIN-NEXT:  .LBB28_190: # %cond.store111
+; RV64-ZVFHMIN-NEXT:    li a1, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 7
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 10
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a1, 882(a3)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 112(a0)
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 6
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_61
+; RV64-ZVFHMIN-NEXT:  .LBB28_191: # %cond.store113
+; RV64-ZVFHMIN-NEXT:    li a1, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 2
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, -1152
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a1, 756(a3)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 114(a0)
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 5
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_62
+; RV64-ZVFHMIN-NEXT:  .LBB28_192: # %cond.store115
+; RV64-ZVFHMIN-NEXT:    li a1, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 27
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 8
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a1, 630(a3)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 116(a0)
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 4
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_63
+; RV64-ZVFHMIN-NEXT:  .LBB28_193: # %cond.store117
+; RV64-ZVFHMIN-NEXT:    li a1, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 2
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, -1408
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a1, 504(a3)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 118(a0)
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 3
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_64
+; RV64-ZVFHMIN-NEXT:  .LBB28_194: # %cond.store119
+; RV64-ZVFHMIN-NEXT:    li a1, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 13
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 9
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a1, 378(a3)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a1
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 120(a0)
+; RV64-ZVFHMIN-NEXT:    slli a1, a2, 2
+; RV64-ZVFHMIN-NEXT:    bltz a1, .LBB28_65
+; RV64-ZVFHMIN-NEXT:    j .LBB28_66
+; RV64-ZVFHMIN-NEXT:  .LBB28_195: # %cond.store125
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 2
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, -1920
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v8, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a2, 0(a3)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 126(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 1
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB28_70
+; RV64-ZVFHMIN-NEXT:  .LBB28_196: # %cond.store127
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v16
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 128(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 2
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB28_71
+; RV64-ZVFHMIN-NEXT:  .LBB28_197: # %cond.store129
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 1
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 130(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 4
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB28_72
+; RV64-ZVFHMIN-NEXT:  .LBB28_198: # %cond.store131
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 2
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 132(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 8
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB28_73
+; RV64-ZVFHMIN-NEXT:  .LBB28_199: # %cond.store133
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 3
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 134(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 16
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB28_74
+; RV64-ZVFHMIN-NEXT:  .LBB28_200: # %cond.store135
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 4
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 136(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 32
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB28_75
+; RV64-ZVFHMIN-NEXT:  .LBB28_201: # %cond.store137
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 5
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 138(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 64
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB28_76
+; RV64-ZVFHMIN-NEXT:  .LBB28_202: # %cond.store139
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 6
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 140(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 128
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB28_77
+; RV64-ZVFHMIN-NEXT:  .LBB28_203: # %cond.store141
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 7
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 142(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 256
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB28_78
+; RV64-ZVFHMIN-NEXT:  .LBB28_204: # %cond.store143
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 8
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 144(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 512
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB28_79
+; RV64-ZVFHMIN-NEXT:  .LBB28_205: # %cond.store145
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 9
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 146(a0)
+; RV64-ZVFHMIN-NEXT:    andi a2, a1, 1024
+; RV64-ZVFHMIN-NEXT:    beqz a2, .LBB28_80
+; RV64-ZVFHMIN-NEXT:  .LBB28_206: # %cond.store147
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 10
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 148(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 52
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_81
+; RV64-ZVFHMIN-NEXT:  .LBB28_207: # %cond.store149
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 11
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 150(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 51
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_82
+; RV64-ZVFHMIN-NEXT:  .LBB28_208: # %cond.store151
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 12
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 152(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 50
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_83
+; RV64-ZVFHMIN-NEXT:  .LBB28_209: # %cond.store153
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 13
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 154(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 49
+; RV64-ZVFHMIN-NEXT:    bgez a2, .LBB28_84
+; RV64-ZVFHMIN-NEXT:  .LBB28_210: # %cond.store155
+; RV64-ZVFHMIN-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
+; RV64-ZVFHMIN-NEXT:    vslidedown.vi v8, v16, 14
+; RV64-ZVFHMIN-NEXT:    vmv.x.s a2, v8
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 156(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 48
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_85
+; RV64-ZVFHMIN-NEXT:    j .LBB28_86
+; RV64-ZVFHMIN-NEXT:  .LBB28_211: # %cond.store159
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 3
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 11
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1890(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 160(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 46
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_88
+; RV64-ZVFHMIN-NEXT:  .LBB28_212: # %cond.store161
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 1
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, 1920
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1764(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 162(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 45
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_89
+; RV64-ZVFHMIN-NEXT:  .LBB28_213: # %cond.store163
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 23
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 8
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1638(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 164(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 44
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_90
+; RV64-ZVFHMIN-NEXT:  .LBB28_214: # %cond.store165
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 1
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, 1664
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1512(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 166(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 43
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_91
+; RV64-ZVFHMIN-NEXT:  .LBB28_215: # %cond.store167
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 11
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 9
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1386(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 168(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 42
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_92
+; RV64-ZVFHMIN-NEXT:  .LBB28_216: # %cond.store169
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 1
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, 1408
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1260(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 170(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 41
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_93
+; RV64-ZVFHMIN-NEXT:  .LBB28_217: # %cond.store171
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 21
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 8
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1134(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 172(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 40
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_94
+; RV64-ZVFHMIN-NEXT:  .LBB28_218: # %cond.store173
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 1
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, 1152
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1008(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 174(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 39
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_95
+; RV64-ZVFHMIN-NEXT:  .LBB28_219: # %cond.store175
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 5
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 10
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 882(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 176(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 38
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_96
+; RV64-ZVFHMIN-NEXT:  .LBB28_220: # %cond.store177
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 1
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, 896
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 756(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 178(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 37
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_97
+; RV64-ZVFHMIN-NEXT:  .LBB28_221: # %cond.store179
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 19
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 8
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 630(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 180(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 36
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_98
+; RV64-ZVFHMIN-NEXT:  .LBB28_222: # %cond.store181
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 1
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, 640
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 504(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 182(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 35
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_99
+; RV64-ZVFHMIN-NEXT:  .LBB28_223: # %cond.store183
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 9
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 9
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 378(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 184(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 34
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_100
+; RV64-ZVFHMIN-NEXT:  .LBB28_224: # %cond.store185
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 1
+; RV64-ZVFHMIN-NEXT:    addiw a4, a4, 384
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 252(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 186(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 33
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_101
+; RV64-ZVFHMIN-NEXT:  .LBB28_225: # %cond.store187
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    li a4, 17
+; RV64-ZVFHMIN-NEXT:    slli a4, a4, 8
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 126(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 188(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 32
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_102
+; RV64-ZVFHMIN-NEXT:    j .LBB28_103
+; RV64-ZVFHMIN-NEXT:  .LBB28_226: # %cond.store191
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    lui a4, 1
+; RV64-ZVFHMIN-NEXT:    add a4, sp, a4
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 2016(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 192(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 30
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_105
+; RV64-ZVFHMIN-NEXT:  .LBB28_227: # %cond.store193
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 1921
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1890(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 194(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 29
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_106
+; RV64-ZVFHMIN-NEXT:  .LBB28_228: # %cond.store195
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 1793
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1764(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 196(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 28
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_107
+; RV64-ZVFHMIN-NEXT:  .LBB28_229: # %cond.store197
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 1665
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1638(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 198(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 27
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_108
+; RV64-ZVFHMIN-NEXT:  .LBB28_230: # %cond.store199
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 1537
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1512(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 200(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 26
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_109
+; RV64-ZVFHMIN-NEXT:  .LBB28_231: # %cond.store201
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 1409
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1386(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 202(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 25
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_110
+; RV64-ZVFHMIN-NEXT:  .LBB28_232: # %cond.store203
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 1281
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1260(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 204(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 24
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_111
+; RV64-ZVFHMIN-NEXT:  .LBB28_233: # %cond.store205
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 1153
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1134(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 206(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 23
+; RV64-ZVFHMIN-NEXT:    bgez a3, .LBB28_112
+; RV64-ZVFHMIN-NEXT:  .LBB28_234: # %cond.store207
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 1025
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 1008(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 208(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 22
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_235
+; RV64-ZVFHMIN-NEXT:    j .LBB28_113
+; RV64-ZVFHMIN-NEXT:  .LBB28_235: # %cond.store209
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 897
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 882(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 210(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 21
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_236
+; RV64-ZVFHMIN-NEXT:    j .LBB28_114
+; RV64-ZVFHMIN-NEXT:  .LBB28_236: # %cond.store211
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 769
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 756(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 212(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 20
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_237
+; RV64-ZVFHMIN-NEXT:    j .LBB28_115
+; RV64-ZVFHMIN-NEXT:  .LBB28_237: # %cond.store213
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 641
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 630(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 214(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 19
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_238
+; RV64-ZVFHMIN-NEXT:    j .LBB28_116
+; RV64-ZVFHMIN-NEXT:  .LBB28_238: # %cond.store215
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 513
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 504(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 216(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 18
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_239
+; RV64-ZVFHMIN-NEXT:    j .LBB28_117
+; RV64-ZVFHMIN-NEXT:  .LBB28_239: # %cond.store217
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 385
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 378(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 218(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 17
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_240
+; RV64-ZVFHMIN-NEXT:    j .LBB28_118
+; RV64-ZVFHMIN-NEXT:  .LBB28_240: # %cond.store219
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 257
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 252(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 220(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 16
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_241
+; RV64-ZVFHMIN-NEXT:    j .LBB28_119
+; RV64-ZVFHMIN-NEXT:  .LBB28_241: # %cond.store221
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 129
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a3, 126(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a3
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 222(a0)
+; RV64-ZVFHMIN-NEXT:    slli a3, a1, 15
+; RV64-ZVFHMIN-NEXT:    bltz a3, .LBB28_242
+; RV64-ZVFHMIN-NEXT:    j .LBB28_120
+; RV64-ZVFHMIN-NEXT:  .LBB28_242: # %cond.store223
+; RV64-ZVFHMIN-NEXT:    li a3, 64
+; RV64-ZVFHMIN-NEXT:    addi a4, sp, 2047
+; RV64-ZVFHMIN-NEXT:    addi a4, a4, 1
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a4)
+; RV64-ZVFHMIN-NEXT:    lh a2, 0(a2)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 224(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 14
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_243
+; RV64-ZVFHMIN-NEXT:    j .LBB28_121
+; RV64-ZVFHMIN-NEXT:  .LBB28_243: # %cond.store225
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 1920
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 2018(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 226(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 13
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_244
+; RV64-ZVFHMIN-NEXT:    j .LBB28_122
+; RV64-ZVFHMIN-NEXT:  .LBB28_244: # %cond.store227
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 1792
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 1892(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 228(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 12
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_245
+; RV64-ZVFHMIN-NEXT:    j .LBB28_123
+; RV64-ZVFHMIN-NEXT:  .LBB28_245: # %cond.store229
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 1664
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 1766(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 230(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 11
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_246
+; RV64-ZVFHMIN-NEXT:    j .LBB28_124
+; RV64-ZVFHMIN-NEXT:  .LBB28_246: # %cond.store231
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 1536
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 1640(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 232(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 10
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_247
+; RV64-ZVFHMIN-NEXT:    j .LBB28_125
+; RV64-ZVFHMIN-NEXT:  .LBB28_247: # %cond.store233
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 1408
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 1514(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 234(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 9
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_248
+; RV64-ZVFHMIN-NEXT:    j .LBB28_126
+; RV64-ZVFHMIN-NEXT:  .LBB28_248: # %cond.store235
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 1280
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 1388(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 236(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 8
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_249
+; RV64-ZVFHMIN-NEXT:    j .LBB28_127
+; RV64-ZVFHMIN-NEXT:  .LBB28_249: # %cond.store237
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 1152
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 1262(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 238(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 7
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_250
+; RV64-ZVFHMIN-NEXT:    j .LBB28_128
+; RV64-ZVFHMIN-NEXT:  .LBB28_250: # %cond.store239
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 1024
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 1136(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 240(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 6
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_251
+; RV64-ZVFHMIN-NEXT:    j .LBB28_129
+; RV64-ZVFHMIN-NEXT:  .LBB28_251: # %cond.store241
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 896
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 1010(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 242(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 5
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_252
+; RV64-ZVFHMIN-NEXT:    j .LBB28_130
+; RV64-ZVFHMIN-NEXT:  .LBB28_252: # %cond.store243
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 768
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 884(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 244(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 4
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_253
+; RV64-ZVFHMIN-NEXT:    j .LBB28_131
+; RV64-ZVFHMIN-NEXT:  .LBB28_253: # %cond.store245
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 640
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 758(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 246(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 3
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_254
+; RV64-ZVFHMIN-NEXT:    j .LBB28_132
+; RV64-ZVFHMIN-NEXT:  .LBB28_254: # %cond.store247
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 512
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 632(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 248(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 2
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_255
+; RV64-ZVFHMIN-NEXT:    j .LBB28_133
+; RV64-ZVFHMIN-NEXT:  .LBB28_255: # %cond.store249
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 384
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 506(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 250(a0)
+; RV64-ZVFHMIN-NEXT:    slli a2, a1, 1
+; RV64-ZVFHMIN-NEXT:    bltz a2, .LBB28_256
+; RV64-ZVFHMIN-NEXT:    j .LBB28_134
+; RV64-ZVFHMIN-NEXT:  .LBB28_256: # %cond.store251
+; RV64-ZVFHMIN-NEXT:    li a2, 64
+; RV64-ZVFHMIN-NEXT:    addi a3, sp, 256
+; RV64-ZVFHMIN-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; RV64-ZVFHMIN-NEXT:    vse16.v v16, (a3)
+; RV64-ZVFHMIN-NEXT:    lh a2, 380(sp)
+; RV64-ZVFHMIN-NEXT:    fmv.h.x fa5, a2
+; RV64-ZVFHMIN-NEXT:    fsh fa5, 252(a0)
+; RV64-ZVFHMIN-NEXT:    bgez a1, .LBB28_257
+; RV64-ZVFHMIN-NEXT:    j .LBB28_135
+; RV64-ZVFHMIN-NEXT:  .LBB28_257: # %cond.store251
+; RV64-ZVFHMIN-NEXT:    j .LBB28_136
+  call void @llvm.masked.store.v128f16.p0(<128 x half> %val, ptr %a, i32 8, <128 x i1> %mask)
+  ret void
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32-ZVFH: {{.*}}
+; RV64-ZVFH: {{.*}}

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll
index 90690bbc8e2085..0c9bf9a09fd6d3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll
@@ -1,645 +1,332 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
-define void @masked_store_v1i8(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v1i8(<1 x i8> %val, ptr %a, <1 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a2)
-; CHECK-NEXT:    vle8.v v9, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vse8.v v9, (a1), v0.t
+; CHECK-NEXT:    vse8.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <1 x i8>, ptr %m_ptr
-  %mask = icmp eq <1 x i8> %m, zeroinitializer
-  %val = load <1 x i8>, ptr %val_ptr
   call void @llvm.masked.store.v1i8.p0(<1 x i8> %val, ptr %a, i32 8, <1 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v1i8.p0(<1 x i8>, ptr, i32, <1 x i1>)
 
-define void @masked_store_v1i16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v1i16(<1 x i16> %val, ptr %a, <1 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v1i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a2)
-; CHECK-NEXT:    vle16.v v9, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vse16.v v9, (a1), v0.t
+; CHECK-NEXT:    vse16.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <1 x i16>, ptr %m_ptr
-  %mask = icmp eq <1 x i16> %m, zeroinitializer
-  %val = load <1 x i16>, ptr %val_ptr
   call void @llvm.masked.store.v1i16.p0(<1 x i16> %val, ptr %a, i32 8, <1 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v1i16.p0(<1 x i16>, ptr, i32, <1 x i1>)
 
-define void @masked_store_v1i32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v1i32(<1 x i32> %val, ptr %a, <1 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a2)
-; CHECK-NEXT:    vle32.v v9, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vse32.v v9, (a1), v0.t
+; CHECK-NEXT:    vse32.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <1 x i32>, ptr %m_ptr
-  %mask = icmp eq <1 x i32> %m, zeroinitializer
-  %val = load <1 x i32>, ptr %val_ptr
   call void @llvm.masked.store.v1i32.p0(<1 x i32> %val, ptr %a, i32 8, <1 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v1i32.p0(<1 x i32>, ptr, i32, <1 x i1>)
 
-define void @masked_store_v1i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v1i64(<1 x i64> %val, ptr %a, <1 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v1i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a2)
-; CHECK-NEXT:    vle64.v v9, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vse64.v v9, (a1), v0.t
+; CHECK-NEXT:    vse64.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <1 x i64>, ptr %m_ptr
-  %mask = icmp eq <1 x i64> %m, zeroinitializer
-  %val = load <1 x i64>, ptr %val_ptr
   call void @llvm.masked.store.v1i64.p0(<1 x i64> %val, ptr %a, i32 8, <1 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v1i64.p0(<1 x i64>, ptr, i32, <1 x i1>)
 
-define void @masked_store_v2i8(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v2i8(<2 x i8> %val, ptr %a, <2 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a2)
-; CHECK-NEXT:    vle8.v v9, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vse8.v v9, (a1), v0.t
+; CHECK-NEXT:    vse8.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <2 x i8>, ptr %m_ptr
-  %mask = icmp eq <2 x i8> %m, zeroinitializer
-  %val = load <2 x i8>, ptr %val_ptr
   call void @llvm.masked.store.v2i8.p0(<2 x i8> %val, ptr %a, i32 8, <2 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v2i8.p0(<2 x i8>, ptr, i32, <2 x i1>)
 
-define void @masked_store_v2i16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v2i16(<2 x i16> %val, ptr %a, <2 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a2)
-; CHECK-NEXT:    vle16.v v9, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vse16.v v9, (a1), v0.t
+; CHECK-NEXT:    vse16.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <2 x i16>, ptr %m_ptr
-  %mask = icmp eq <2 x i16> %m, zeroinitializer
-  %val = load <2 x i16>, ptr %val_ptr
   call void @llvm.masked.store.v2i16.p0(<2 x i16> %val, ptr %a, i32 8, <2 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v2i16.p0(<2 x i16>, ptr, i32, <2 x i1>)
 
-define void @masked_store_v2i32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v2i32(<2 x i32> %val, ptr %a, <2 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a2)
-; CHECK-NEXT:    vle32.v v9, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vse32.v v9, (a1), v0.t
+; CHECK-NEXT:    vse32.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <2 x i32>, ptr %m_ptr
-  %mask = icmp eq <2 x i32> %m, zeroinitializer
-  %val = load <2 x i32>, ptr %val_ptr
   call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %a, i32 8, <2 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v2i32.p0(<2 x i32>, ptr, i32, <2 x i1>)
 
-define void @masked_store_v2i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v2i64(<2 x i64> %val, ptr %a, <2 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a2)
-; CHECK-NEXT:    vle64.v v9, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vse64.v v9, (a1), v0.t
+; CHECK-NEXT:    vse64.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <2 x i64>, ptr %m_ptr
-  %mask = icmp eq <2 x i64> %m, zeroinitializer
-  %val = load <2 x i64>, ptr %val_ptr
   call void @llvm.masked.store.v2i64.p0(<2 x i64> %val, ptr %a, i32 8, <2 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v2i64.p0(<2 x i64>, ptr, i32, <2 x i1>)
 
-define void @masked_store_v4i8(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v4i8(<4 x i8> %val, ptr %a, <4 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a2)
-; CHECK-NEXT:    vle8.v v9, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vse8.v v9, (a1), v0.t
+; CHECK-NEXT:    vse8.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <4 x i8>, ptr %m_ptr
-  %mask = icmp eq <4 x i8> %m, zeroinitializer
-  %val = load <4 x i8>, ptr %val_ptr
   call void @llvm.masked.store.v4i8.p0(<4 x i8> %val, ptr %a, i32 8, <4 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v4i8.p0(<4 x i8>, ptr, i32, <4 x i1>)
 
-define void @masked_store_v4i16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v4i16(<4 x i16> %val, ptr %a, <4 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a2)
-; CHECK-NEXT:    vle16.v v9, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vse16.v v9, (a1), v0.t
+; CHECK-NEXT:    vse16.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <4 x i16>, ptr %m_ptr
-  %mask = icmp eq <4 x i16> %m, zeroinitializer
-  %val = load <4 x i16>, ptr %val_ptr
   call void @llvm.masked.store.v4i16.p0(<4 x i16> %val, ptr %a, i32 8, <4 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v4i16.p0(<4 x i16>, ptr, i32, <4 x i1>)
 
-define void @masked_store_v4i32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v4i32(<4 x i32> %val, ptr %a, <4 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a2)
-; CHECK-NEXT:    vle32.v v9, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vse32.v v9, (a1), v0.t
+; CHECK-NEXT:    vse32.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <4 x i32>, ptr %m_ptr
-  %mask = icmp eq <4 x i32> %m, zeroinitializer
-  %val = load <4 x i32>, ptr %val_ptr
   call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %a, i32 8, <4 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32, <4 x i1>)
 
-define void @masked_store_v4i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v4i64(<4 x i64> %val, ptr %a, <4 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a2)
-; CHECK-NEXT:    vle64.v v10, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vse64.v v10, (a1), v0.t
+; CHECK-NEXT:    vse64.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <4 x i64>, ptr %m_ptr
-  %mask = icmp eq <4 x i64> %m, zeroinitializer
-  %val = load <4 x i64>, ptr %val_ptr
   call void @llvm.masked.store.v4i64.p0(<4 x i64> %val, ptr %a, i32 8, <4 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v4i64.p0(<4 x i64>, ptr, i32, <4 x i1>)
 
-define void @masked_store_v8i8(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v8i8(<8 x i8> %val, ptr %a, <8 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a2)
-; CHECK-NEXT:    vle8.v v9, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vse8.v v9, (a1), v0.t
+; CHECK-NEXT:    vse8.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <8 x i8>, ptr %m_ptr
-  %mask = icmp eq <8 x i8> %m, zeroinitializer
-  %val = load <8 x i8>, ptr %val_ptr
   call void @llvm.masked.store.v8i8.p0(<8 x i8> %val, ptr %a, i32 8, <8 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v8i8.p0(<8 x i8>, ptr, i32, <8 x i1>)
 
-define void @masked_store_v8i16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v8i16(<8 x i16> %val, ptr %a, <8 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a2)
-; CHECK-NEXT:    vle16.v v9, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vse16.v v9, (a1), v0.t
+; CHECK-NEXT:    vse16.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <8 x i16>, ptr %m_ptr
-  %mask = icmp eq <8 x i16> %m, zeroinitializer
-  %val = load <8 x i16>, ptr %val_ptr
   call void @llvm.masked.store.v8i16.p0(<8 x i16> %val, ptr %a, i32 8, <8 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32, <8 x i1>)
 
-define void @masked_store_v8i32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v8i32(<8 x i32> %val, ptr %a, <8 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a2)
-; CHECK-NEXT:    vle32.v v10, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vse32.v v10, (a1), v0.t
+; CHECK-NEXT:    vse32.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <8 x i32>, ptr %m_ptr
-  %mask = icmp eq <8 x i32> %m, zeroinitializer
-  %val = load <8 x i32>, ptr %val_ptr
   call void @llvm.masked.store.v8i32.p0(<8 x i32> %val, ptr %a, i32 8, <8 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v8i32.p0(<8 x i32>, ptr, i32, <8 x i1>)
 
-define void @masked_store_v8i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v8i64(<8 x i64> %val, ptr %a, <8 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v8i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a2)
-; CHECK-NEXT:    vle64.v v12, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vse64.v v12, (a1), v0.t
+; CHECK-NEXT:    vse64.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <8 x i64>, ptr %m_ptr
-  %mask = icmp eq <8 x i64> %m, zeroinitializer
-  %val = load <8 x i64>, ptr %val_ptr
   call void @llvm.masked.store.v8i64.p0(<8 x i64> %val, ptr %a, i32 8, <8 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v8i64.p0(<8 x i64>, ptr, i32, <8 x i1>)
 
-define void @masked_store_v16i8(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v16i8(<16 x i8> %val, ptr %a, <16 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a2)
-; CHECK-NEXT:    vle8.v v9, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vse8.v v9, (a1), v0.t
+; CHECK-NEXT:    vse8.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <16 x i8>, ptr %m_ptr
-  %mask = icmp eq <16 x i8> %m, zeroinitializer
-  %val = load <16 x i8>, ptr %val_ptr
   call void @llvm.masked.store.v16i8.p0(<16 x i8> %val, ptr %a, i32 8, <16 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32, <16 x i1>)
 
-define void @masked_store_v16i16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v16i16(<16 x i16> %val, ptr %a, <16 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a2)
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vse16.v v10, (a1), v0.t
+; CHECK-NEXT:    vse16.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <16 x i16>, ptr %m_ptr
-  %mask = icmp eq <16 x i16> %m, zeroinitializer
-  %val = load <16 x i16>, ptr %val_ptr
   call void @llvm.masked.store.v16i16.p0(<16 x i16> %val, ptr %a, i32 8, <16 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v16i16.p0(<16 x i16>, ptr, i32, <16 x i1>)
 
-define void @masked_store_v16i32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v16i32(<16 x i32> %val, ptr %a, <16 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a2)
-; CHECK-NEXT:    vle32.v v12, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vse32.v v12, (a1), v0.t
+; CHECK-NEXT:    vse32.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <16 x i32>, ptr %m_ptr
-  %mask = icmp eq <16 x i32> %m, zeroinitializer
-  %val = load <16 x i32>, ptr %val_ptr
   call void @llvm.masked.store.v16i32.p0(<16 x i32> %val, ptr %a, i32 8, <16 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v16i32.p0(<16 x i32>, ptr, i32, <16 x i1>)
 
-define void @masked_store_v16i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v16i64(<16 x i64> %val, ptr %a, <16 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v16i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a2)
-; CHECK-NEXT:    vle64.v v16, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vse64.v v16, (a1), v0.t
+; CHECK-NEXT:    vse64.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <16 x i64>, ptr %m_ptr
-  %mask = icmp eq <16 x i64> %m, zeroinitializer
-  %val = load <16 x i64>, ptr %val_ptr
   call void @llvm.masked.store.v16i64.p0(<16 x i64> %val, ptr %a, i32 8, <16 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v16i64.p0(<16 x i64>, ptr, i32, <16 x i1>)
 
-define void @masked_store_v32i8(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v32i8(<32 x i8> %val, ptr %a, <32 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v32i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a3, 32
-; CHECK-NEXT:    vsetvli zero, a3, e8, m2, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a2)
-; CHECK-NEXT:    vle8.v v10, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vse8.v v10, (a1), v0.t
+; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT:    vse8.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <32 x i8>, ptr %m_ptr
-  %mask = icmp eq <32 x i8> %m, zeroinitializer
-  %val = load <32 x i8>, ptr %val_ptr
   call void @llvm.masked.store.v32i8.p0(<32 x i8> %val, ptr %a, i32 8, <32 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v32i8.p0(<32 x i8>, ptr, i32, <32 x i1>)
 
-define void @masked_store_v32i16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v32i16(<32 x i16> %val, ptr %a, <32 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v32i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a3, 32
-; CHECK-NEXT:    vsetvli zero, a3, e16, m4, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a2)
-; CHECK-NEXT:    vle16.v v12, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vse16.v v12, (a1), v0.t
+; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <32 x i16>, ptr %m_ptr
-  %mask = icmp eq <32 x i16> %m, zeroinitializer
-  %val = load <32 x i16>, ptr %val_ptr
   call void @llvm.masked.store.v32i16.p0(<32 x i16> %val, ptr %a, i32 8, <32 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v32i16.p0(<32 x i16>, ptr, i32, <32 x i1>)
 
-define void @masked_store_v32i32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v32i32(<32 x i32> %val, ptr %a, <32 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v32i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a3, 32
-; CHECK-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a2)
-; CHECK-NEXT:    vle32.v v16, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vse32.v v16, (a1), v0.t
+; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT:    vse32.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <32 x i32>, ptr %m_ptr
-  %mask = icmp eq <32 x i32> %m, zeroinitializer
-  %val = load <32 x i32>, ptr %val_ptr
   call void @llvm.masked.store.v32i32.p0(<32 x i32> %val, ptr %a, i32 8, <32 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v32i32.p0(<32 x i32>, ptr, i32, <32 x i1>)
 
-define void @masked_store_v32i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v32i64(<32 x i64> %val, ptr %a, <32 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v32i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrr a3, vlenb
-; CHECK-NEXT:    slli a3, a3, 4
-; CHECK-NEXT:    sub sp, sp, a3
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a2)
-; CHECK-NEXT:    addi a2, a2, 128
-; CHECK-NEXT:    vle64.v v16, (a2)
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vle64.v v24, (a0)
-; CHECK-NEXT:    addi a0, a0, 128
-; CHECK-NEXT:    vle64.v v8, (a0)
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmseq.vi v8, v16, 0
-; CHECK-NEXT:    vse64.v v24, (a1), v0.t
-; CHECK-NEXT:    addi a0, a1, 128
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vse64.v v8, (a0), v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; CHECK-NEXT:    vslidedown.vi v0, v0, 2
+; CHECK-NEXT:    addi a0, a0, 128
+; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT:    vse64.v v16, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <32 x i64>, ptr %m_ptr
-  %mask = icmp eq <32 x i64> %m, zeroinitializer
-  %val = load <32 x i64>, ptr %val_ptr
   call void @llvm.masked.store.v32i64.p0(<32 x i64> %val, ptr %a, i32 8, <32 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v32i64.p0(<32 x i64>, ptr, i32, <32 x i1>)
 
-define void @masked_store_v64i8(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v64i8(<64 x i8> %val, ptr %a, <64 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v64i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a3, 64
-; CHECK-NEXT:    vsetvli zero, a3, e8, m4, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a2)
-; CHECK-NEXT:    vle8.v v12, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vse8.v v12, (a1), v0.t
+; CHECK-NEXT:    li a1, 64
+; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT:    vse8.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <64 x i8>, ptr %m_ptr
-  %mask = icmp eq <64 x i8> %m, zeroinitializer
-  %val = load <64 x i8>, ptr %val_ptr
   call void @llvm.masked.store.v64i8.p0(<64 x i8> %val, ptr %a, i32 8, <64 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v64i8.p0(<64 x i8>, ptr, i32, <64 x i1>)
 
-define void @masked_store_v64i16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v64i16(<64 x i16> %val, ptr %a, <64 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v64i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a3, 64
-; CHECK-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a2)
-; CHECK-NEXT:    vle16.v v16, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vse16.v v16, (a1), v0.t
+; CHECK-NEXT:    li a1, 64
+; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <64 x i16>, ptr %m_ptr
-  %mask = icmp eq <64 x i16> %m, zeroinitializer
-  %val = load <64 x i16>, ptr %val_ptr
   call void @llvm.masked.store.v64i16.p0(<64 x i16> %val, ptr %a, i32 8, <64 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v64i16.p0(<64 x i16>, ptr, i32, <64 x i1>)
 
-define void @masked_store_v64i32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v64i32(<64 x i32> %val, ptr %a, <64 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v64i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrr a3, vlenb
-; CHECK-NEXT:    slli a3, a3, 4
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    li a3, 32
-; CHECK-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a2)
-; CHECK-NEXT:    addi a2, a2, 128
-; CHECK-NEXT:    vle32.v v16, (a2)
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vle32.v v24, (a0)
-; CHECK-NEXT:    addi a0, a0, 128
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmseq.vi v8, v16, 0
-; CHECK-NEXT:    vse32.v v24, (a1), v0.t
-; CHECK-NEXT:    addi a0, a1, 128
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
 ; CHECK-NEXT:    vse32.v v8, (a0), v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
+; CHECK-NEXT:    vslidedown.vi v0, v0, 4
+; CHECK-NEXT:    addi a0, a0, 128
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT:    vse32.v v16, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <64 x i32>, ptr %m_ptr
-  %mask = icmp eq <64 x i32> %m, zeroinitializer
-  %val = load <64 x i32>, ptr %val_ptr
   call void @llvm.masked.store.v64i32.p0(<64 x i32> %val, ptr %a, i32 8, <64 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v64i32.p0(<64 x i32>, ptr, i32, <64 x i1>)
 
-define void @masked_store_v128i8(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v128i8(<128 x i8> %val, ptr %a, <128 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v128i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li a3, 128
-; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a2)
-; CHECK-NEXT:    vle8.v v16, (a0)
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vse8.v v16, (a1), v0.t
+; CHECK-NEXT:    li a1, 128
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vse8.v v8, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <128 x i8>, ptr %m_ptr
-  %mask = icmp eq <128 x i8> %m, zeroinitializer
-  %val = load <128 x i8>, ptr %val_ptr
   call void @llvm.masked.store.v128i8.p0(<128 x i8> %val, ptr %a, i32 8, <128 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v128i8.p0(<128 x i8>, ptr, i32, <128 x i1>)
 
-define void @masked_store_v128i16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v128i16(<128 x i16> %val, ptr %a, <128 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v128i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrr a3, vlenb
-; CHECK-NEXT:    slli a3, a3, 4
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    li a3, 64
-; CHECK-NEXT:    vsetvli zero, a3, e16, m8, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a2)
-; CHECK-NEXT:    addi a2, a2, 128
-; CHECK-NEXT:    vle16.v v16, (a2)
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vle16.v v24, (a0)
-; CHECK-NEXT:    addi a0, a0, 128
-; CHECK-NEXT:    vle16.v v8, (a0)
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmseq.vi v8, v16, 0
-; CHECK-NEXT:    vse16.v v24, (a1), v0.t
-; CHECK-NEXT:    addi a0, a1, 128
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT:    li a1, 64
+; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
 ; CHECK-NEXT:    vse16.v v8, (a0), v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
+; CHECK-NEXT:    vslidedown.vi v0, v0, 8
+; CHECK-NEXT:    addi a0, a0, 128
+; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT:    vse16.v v16, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <128 x i16>, ptr %m_ptr
-  %mask = icmp eq <128 x i16> %m, zeroinitializer
-  %val = load <128 x i16>, ptr %val_ptr
   call void @llvm.masked.store.v128i16.p0(<128 x i16> %val, ptr %a, i32 8, <128 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v128i16.p0(<128 x i16>, ptr, i32, <128 x i1>)
 
-define void @masked_store_v256i8(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind {
+define void @masked_store_v256i8(<256 x i8> %val, ptr %a, <256 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v256i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    csrr a3, vlenb
-; CHECK-NEXT:    slli a3, a3, 4
-; CHECK-NEXT:    sub sp, sp, a3
-; CHECK-NEXT:    li a3, 128
-; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a2)
-; CHECK-NEXT:    addi a2, a2, 128
-; CHECK-NEXT:    vle8.v v16, (a2)
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a2, a2, 3
-; CHECK-NEXT:    add a2, sp, a2
-; CHECK-NEXT:    addi a2, a2, 16
-; CHECK-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
-; CHECK-NEXT:    vmseq.vi v0, v8, 0
-; CHECK-NEXT:    vle8.v v24, (a0)
-; CHECK-NEXT:    addi a0, a0, 128
-; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, sp, a0
-; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT:    vmseq.vi v8, v16, 0
-; CHECK-NEXT:    vse8.v v24, (a1), v0.t
-; CHECK-NEXT:    addi a0, a1, 128
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    addi a1, sp, 16
-; CHECK-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT:    li a2, 128
+; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT:    vlm.v v24, (a1)
 ; CHECK-NEXT:    vse8.v v8, (a0), v0.t
-; CHECK-NEXT:    csrr a0, vlenb
-; CHECK-NEXT:    slli a0, a0, 4
-; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    addi a0, a0, 128
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vse8.v v16, (a0), v0.t
 ; CHECK-NEXT:    ret
-  %m = load <256 x i8>, ptr %m_ptr
-  %mask = icmp eq <256 x i8> %m, zeroinitializer
-  %val = load <256 x i8>, ptr %val_ptr
   call void @llvm.masked.store.v256i8.p0(<256 x i8> %val, ptr %a, i32 8, <256 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.store.v256i8.p0(<256 x i8>, ptr, i32, <256 x i1>)
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; RV32: {{.*}}
-; RV64: {{.*}}
+


        


More information about the llvm-commits mailing list