[llvm] [RISCV] Lower mgather/mscatter for zvfhmin/zvfbfmin (PR #114945)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 4 23:31:12 PST 2024
https://github.com/lukel97 created https://github.com/llvm/llvm-project/pull/114945
In preparation for allowing zvfhmin and zvfbfmin in isLegalElementTypeForRVV, this lowers masked gathers and scatters.
We need to mark f16 and bf16 as legal in isLegalMaskedGatherScatter otherwise ScalarizeMaskedMemIntrin will just scalarize them, but we can move this back into isLegalElementTypeForRVV afterwards.
The scalarized codegen required #114938, #114927 and #114915 to not crash.
>From fde526cff82551807369a18c12e8cbe17030ca1f Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 5 Nov 2024 15:19:02 +0800
Subject: [PATCH 1/2] Precommit tests
---
.../RISCV/rvv/fixed-vectors-masked-gather.ll | 7708 +++++++++++---
.../RISCV/rvv/fixed-vectors-masked-scatter.ll | 8925 +++++++++++++----
2 files changed, 13591 insertions(+), 3042 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 5802f45d311b37..2d7e1bf314f7ac 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -1,12 +1,21 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zve32f,+zvl128b -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zve32f,+zvl128b -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V,RV32V-ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RV64V-ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V,RV32V-ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RV64V-ZVFHMIN
+
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F,RV32ZVE32F-ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F,RV64ZVE32F-ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F,RV32ZVE32F-ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F,RV64ZVE32F-ZVFHMIN
declare <1 x i8> @llvm.masked.gather.v1i8.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i8>)
@@ -6874,31 +6883,52 @@ define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m,
ret <8 x i64> %v
}
-declare <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x half>)
+declare <1 x bfloat> @llvm.masked.gather.v1bf16.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x bfloat>)
-define <1 x half> @mgather_v1f16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x half> %passthru) {
-; RV32V-LABEL: mgather_v1f16:
+define <1 x bfloat> @mgather_v1bf16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x bfloat> %passthru) {
+; RV32V-LABEL: mgather_v1bf16:
; RV32V: # %bb.0:
-; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
-; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
+; RV32V-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; RV32V-NEXT: vfirst.m a0, v0
+; RV32V-NEXT: bnez a0, .LBB58_2
+; RV32V-NEXT: # %bb.1: # %cond.load
+; RV32V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV32V-NEXT: vmv.x.s a0, v8
+; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV32V-NEXT: vle16.v v9, (a0)
+; RV32V-NEXT: .LBB58_2: # %else
; RV32V-NEXT: vmv1r.v v8, v9
; RV32V-NEXT: ret
;
-; RV64V-LABEL: mgather_v1f16:
+; RV64V-LABEL: mgather_v1bf16:
; RV64V: # %bb.0:
-; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
-; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
+; RV64V-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; RV64V-NEXT: vfirst.m a0, v0
+; RV64V-NEXT: bnez a0, .LBB58_2
+; RV64V-NEXT: # %bb.1: # %cond.load
+; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; RV64V-NEXT: vmv.x.s a0, v8
+; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV64V-NEXT: vle16.v v9, (a0)
+; RV64V-NEXT: .LBB58_2: # %else
; RV64V-NEXT: vmv1r.v v8, v9
; RV64V-NEXT: ret
;
-; RV32ZVE32F-LABEL: mgather_v1f16:
+; RV32ZVE32F-LABEL: mgather_v1bf16:
; RV32ZVE32F: # %bb.0:
-; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu
-; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
+; RV32ZVE32F-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; RV32ZVE32F-NEXT: vfirst.m a0, v0
+; RV32ZVE32F-NEXT: bnez a0, .LBB58_2
+; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
+; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32ZVE32F-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV32ZVE32F-NEXT: vle16.v v9, (a0)
+; RV32ZVE32F-NEXT: .LBB58_2: # %else
; RV32ZVE32F-NEXT: vmv1r.v v8, v9
; RV32ZVE32F-NEXT: ret
;
-; RV64ZVE32F-LABEL: mgather_v1f16:
+; RV64ZVE32F-LABEL: mgather_v1bf16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vfirst.m a1, v0
@@ -6908,35 +6938,107 @@ define <1 x half> @mgather_v1f16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x half> %passt
; RV64ZVE32F-NEXT: vle16.v v8, (a0)
; RV64ZVE32F-NEXT: .LBB58_2: # %else
; RV64ZVE32F-NEXT: ret
- %v = call <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr> %ptrs, i32 2, <1 x i1> %m, <1 x half> %passthru)
- ret <1 x half> %v
+ %v = call <1 x bfloat> @llvm.masked.gather.v1bf16.v1p0(<1 x ptr> %ptrs, i32 2, <1 x i1> %m, <1 x bfloat> %passthru)
+ ret <1 x bfloat> %v
}
-declare <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x half>)
+declare <2 x bfloat> @llvm.masked.gather.v2bf16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x bfloat>)
-define <2 x half> @mgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x half> %passthru) {
-; RV32V-LABEL: mgather_v2f16:
+define <2 x bfloat> @mgather_v2bf16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x bfloat> %passthru) {
+; RV32V-LABEL: mgather_v2bf16:
; RV32V: # %bb.0:
-; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
-; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
+; RV32V-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32V-NEXT: vmv.x.s a0, v0
+; RV32V-NEXT: andi a1, a0, 1
+; RV32V-NEXT: bnez a1, .LBB59_3
+; RV32V-NEXT: # %bb.1: # %else
+; RV32V-NEXT: andi a0, a0, 2
+; RV32V-NEXT: bnez a0, .LBB59_4
+; RV32V-NEXT: .LBB59_2: # %else2
+; RV32V-NEXT: vmv1r.v v8, v9
+; RV32V-NEXT: ret
+; RV32V-NEXT: .LBB59_3: # %cond.load
+; RV32V-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV32V-NEXT: vmv.x.s a1, v8
+; RV32V-NEXT: lh a1, 0(a1)
+; RV32V-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV32V-NEXT: vmv.s.x v9, a1
+; RV32V-NEXT: andi a0, a0, 2
+; RV32V-NEXT: beqz a0, .LBB59_2
+; RV32V-NEXT: .LBB59_4: # %cond.load1
+; RV32V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32V-NEXT: vslidedown.vi v8, v8, 1
+; RV32V-NEXT: vmv.x.s a0, v8
+; RV32V-NEXT: lh a0, 0(a0)
+; RV32V-NEXT: vmv.s.x v8, a0
+; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32V-NEXT: vslideup.vi v9, v8, 1
; RV32V-NEXT: vmv1r.v v8, v9
; RV32V-NEXT: ret
;
-; RV64V-LABEL: mgather_v2f16:
+; RV64V-LABEL: mgather_v2bf16:
; RV64V: # %bb.0:
-; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
-; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
+; RV64V-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64V-NEXT: vmv.x.s a0, v0
+; RV64V-NEXT: andi a1, a0, 1
+; RV64V-NEXT: bnez a1, .LBB59_3
+; RV64V-NEXT: # %bb.1: # %else
+; RV64V-NEXT: andi a0, a0, 2
+; RV64V-NEXT: bnez a0, .LBB59_4
+; RV64V-NEXT: .LBB59_2: # %else2
+; RV64V-NEXT: vmv1r.v v8, v9
+; RV64V-NEXT: ret
+; RV64V-NEXT: .LBB59_3: # %cond.load
+; RV64V-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; RV64V-NEXT: vmv.x.s a1, v8
+; RV64V-NEXT: lh a1, 0(a1)
+; RV64V-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV64V-NEXT: vmv.s.x v9, a1
+; RV64V-NEXT: andi a0, a0, 2
+; RV64V-NEXT: beqz a0, .LBB59_2
+; RV64V-NEXT: .LBB59_4: # %cond.load1
+; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v8, v8, 1
+; RV64V-NEXT: vmv.x.s a0, v8
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 1
; RV64V-NEXT: vmv1r.v v8, v9
; RV64V-NEXT: ret
;
-; RV32ZVE32F-LABEL: mgather_v2f16:
+; RV32ZVE32F-LABEL: mgather_v2bf16:
; RV32ZVE32F: # %bb.0:
-; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
-; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
+; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32ZVE32F-NEXT: vmv.x.s a0, v0
+; RV32ZVE32F-NEXT: andi a1, a0, 1
+; RV32ZVE32F-NEXT: bnez a1, .LBB59_3
+; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: andi a0, a0, 2
+; RV32ZVE32F-NEXT: bnez a0, .LBB59_4
+; RV32ZVE32F-NEXT: .LBB59_2: # %else2
+; RV32ZVE32F-NEXT: vmv1r.v v8, v9
+; RV32ZVE32F-NEXT: ret
+; RV32ZVE32F-NEXT: .LBB59_3: # %cond.load
+; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV32ZVE32F-NEXT: vmv.x.s a1, v8
+; RV32ZVE32F-NEXT: lh a1, 0(a1)
+; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV32ZVE32F-NEXT: vmv.s.x v9, a1
+; RV32ZVE32F-NEXT: andi a0, a0, 2
+; RV32ZVE32F-NEXT: beqz a0, .LBB59_2
+; RV32ZVE32F-NEXT: .LBB59_4: # %cond.load1
+; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
+; RV32ZVE32F-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV32ZVE32F-NEXT: vslideup.vi v9, v8, 1
; RV32ZVE32F-NEXT: vmv1r.v v8, v9
; RV32ZVE32F-NEXT: ret
;
-; RV64ZVE32F-LABEL: mgather_v2f16:
+; RV64ZVE32F-LABEL: mgather_v2bf16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
@@ -6948,40 +7050,140 @@ define <2 x half> @mgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x half> %passt
; RV64ZVE32F-NEXT: .LBB59_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB59_3: # %cond.load
-; RV64ZVE32F-NEXT: flh fa5, 0(a0)
+; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB59_2
; RV64ZVE32F-NEXT: .LBB59_4: # %cond.load1
-; RV64ZVE32F-NEXT: flh fa5, 0(a1)
+; RV64ZVE32F-NEXT: lh a0, 0(a1)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: ret
- %v = call <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x half> %passthru)
- ret <2 x half> %v
+ %v = call <2 x bfloat> @llvm.masked.gather.v2bf16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x bfloat> %passthru)
+ ret <2 x bfloat> %v
}
-declare <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x half>)
+declare <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x bfloat>)
-define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passthru) {
-; RV32-LABEL: mgather_v4f16:
+define <4 x bfloat> @mgather_v4bf16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x bfloat> %passthru) {
+; RV32-LABEL: mgather_v4bf16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
-; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vmv.x.s a0, v0
+; RV32-NEXT: andi a1, a0, 1
+; RV32-NEXT: bnez a1, .LBB60_5
+; RV32-NEXT: # %bb.1: # %else
+; RV32-NEXT: andi a1, a0, 2
+; RV32-NEXT: bnez a1, .LBB60_6
+; RV32-NEXT: .LBB60_2: # %else2
+; RV32-NEXT: andi a1, a0, 4
+; RV32-NEXT: bnez a1, .LBB60_7
+; RV32-NEXT: .LBB60_3: # %else5
+; RV32-NEXT: andi a0, a0, 8
+; RV32-NEXT: bnez a0, .LBB60_8
+; RV32-NEXT: .LBB60_4: # %else8
+; RV32-NEXT: vmv1r.v v8, v9
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB60_5: # %cond.load
+; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.x.s a1, v8
+; RV32-NEXT: lh a1, 0(a1)
+; RV32-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV32-NEXT: vmv.s.x v9, a1
+; RV32-NEXT: andi a1, a0, 2
+; RV32-NEXT: beqz a1, .LBB60_2
+; RV32-NEXT: .LBB60_6: # %cond.load1
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v8, 1
+; RV32-NEXT: vmv.x.s a1, v10
+; RV32-NEXT: lh a1, 0(a1)
+; RV32-NEXT: vmv.s.x v10, a1
+; RV32-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
+; RV32-NEXT: vslideup.vi v9, v10, 1
+; RV32-NEXT: andi a1, a0, 4
+; RV32-NEXT: beqz a1, .LBB60_3
+; RV32-NEXT: .LBB60_7: # %cond.load4
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v8, 2
+; RV32-NEXT: vmv.x.s a1, v10
+; RV32-NEXT: lh a1, 0(a1)
+; RV32-NEXT: vmv.s.x v10, a1
+; RV32-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
+; RV32-NEXT: vslideup.vi v9, v10, 2
+; RV32-NEXT: andi a0, a0, 8
+; RV32-NEXT: beqz a0, .LBB60_4
+; RV32-NEXT: .LBB60_8: # %cond.load7
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v8, 3
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT: vslideup.vi v9, v8, 3
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
-; RV64V-LABEL: mgather_v4f16:
+; RV64V-LABEL: mgather_v4bf16:
; RV64V: # %bb.0:
-; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
-; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
+; RV64V-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64V-NEXT: vmv.x.s a0, v0
+; RV64V-NEXT: andi a1, a0, 1
+; RV64V-NEXT: bnez a1, .LBB60_5
+; RV64V-NEXT: # %bb.1: # %else
+; RV64V-NEXT: andi a1, a0, 2
+; RV64V-NEXT: bnez a1, .LBB60_6
+; RV64V-NEXT: .LBB60_2: # %else2
+; RV64V-NEXT: andi a1, a0, 4
+; RV64V-NEXT: bnez a1, .LBB60_7
+; RV64V-NEXT: .LBB60_3: # %else5
+; RV64V-NEXT: andi a0, a0, 8
+; RV64V-NEXT: bnez a0, .LBB60_8
+; RV64V-NEXT: .LBB60_4: # %else8
+; RV64V-NEXT: vmv1r.v v8, v10
+; RV64V-NEXT: ret
+; RV64V-NEXT: .LBB60_5: # %cond.load
+; RV64V-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; RV64V-NEXT: vmv.x.s a1, v8
+; RV64V-NEXT: lh a1, 0(a1)
+; RV64V-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV64V-NEXT: vmv.s.x v10, a1
+; RV64V-NEXT: andi a1, a0, 2
+; RV64V-NEXT: beqz a1, .LBB60_2
+; RV64V-NEXT: .LBB60_6: # %cond.load1
+; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v11, v8, 1
+; RV64V-NEXT: vmv.x.s a1, v11
+; RV64V-NEXT: lh a1, 0(a1)
+; RV64V-NEXT: vmv.s.x v11, a1
+; RV64V-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
+; RV64V-NEXT: vslideup.vi v10, v11, 1
+; RV64V-NEXT: andi a1, a0, 4
+; RV64V-NEXT: beqz a1, .LBB60_3
+; RV64V-NEXT: .LBB60_7: # %cond.load4
+; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-NEXT: vslidedown.vi v12, v8, 2
+; RV64V-NEXT: vmv.x.s a1, v12
+; RV64V-NEXT: lh a1, 0(a1)
+; RV64V-NEXT: vmv.s.x v11, a1
+; RV64V-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
+; RV64V-NEXT: vslideup.vi v10, v11, 2
+; RV64V-NEXT: andi a0, a0, 8
+; RV64V-NEXT: beqz a0, .LBB60_4
+; RV64V-NEXT: .LBB60_8: # %cond.load7
+; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-NEXT: vslidedown.vi v8, v8, 3
+; RV64V-NEXT: vmv.x.s a0, v8
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64V-NEXT: vslideup.vi v10, v8, 3
; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
-; RV64ZVE32F-LABEL: mgather_v4f16:
+; RV64ZVE32F-LABEL: mgather_v4bf16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
@@ -7000,110 +7202,358 @@ define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passt
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB60_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB60_2
; RV64ZVE32F-NEXT: .LBB60_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB60_3
; RV64ZVE32F-NEXT: .LBB60_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: beqz a1, .LBB60_4
; RV64ZVE32F-NEXT: .LBB60_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a0, 24(a0)
-; RV64ZVE32F-NEXT: flh fa5, 0(a0)
+; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: ret
- %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %m, <4 x half> %passthru)
- ret <4 x half> %v
+ %v = call <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %m, <4 x bfloat> %passthru)
+ ret <4 x bfloat> %v
}
-define <4 x half> @mgather_truemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) {
-; RV32-LABEL: mgather_truemask_v4f16:
+define <4 x bfloat> @mgather_truemask_v4bf16(<4 x ptr> %ptrs, <4 x bfloat> %passthru) {
+; RV32-LABEL: mgather_truemask_v4bf16:
; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: vslidedown.vi v9, v8, 1
+; RV32-NEXT: vmv.x.s a1, v9
+; RV32-NEXT: vslidedown.vi v9, v8, 2
+; RV32-NEXT: vmv.x.s a2, v9
+; RV32-NEXT: vslidedown.vi v8, v8, 3
+; RV32-NEXT: vmv.x.s a3, v8
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: lh a1, 0(a1)
+; RV32-NEXT: lh a2, 0(a2)
+; RV32-NEXT: lh a3, 0(a3)
; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vluxei32.v v9, (zero), v8
-; RV32-NEXT: vmv1r.v v8, v9
+; RV32-NEXT: vmv.v.x v8, a0
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: vslide1down.vx v8, v8, a3
; RV32-NEXT: ret
;
-; RV64V-LABEL: mgather_truemask_v4f16:
+; RV64V-LABEL: mgather_truemask_v4bf16:
; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-NEXT: vmv.x.s a0, v8
+; RV64V-NEXT: vslidedown.vi v10, v8, 1
+; RV64V-NEXT: vmv.x.s a1, v10
+; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-NEXT: vslidedown.vi v10, v8, 2
+; RV64V-NEXT: vmv.x.s a2, v10
+; RV64V-NEXT: vslidedown.vi v8, v8, 3
+; RV64V-NEXT: vmv.x.s a3, v8
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: lh a1, 0(a1)
+; RV64V-NEXT: lh a2, 0(a2)
+; RV64V-NEXT: lh a3, 0(a3)
; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64V-NEXT: vluxei64.v v10, (zero), v8
-; RV64V-NEXT: vmv1r.v v8, v10
+; RV64V-NEXT: vmv.v.x v8, a0
+; RV64V-NEXT: vslide1down.vx v8, v8, a1
+; RV64V-NEXT: vslide1down.vx v8, v8, a2
+; RV64V-NEXT: vslide1down.vx v8, v8, a3
; RV64V-NEXT: ret
;
-; RV64ZVE32F-LABEL: mgather_truemask_v4f16:
+; RV64ZVE32F-LABEL: mgather_truemask_v4bf16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ld a1, 0(a0)
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: ld a3, 16(a0)
; RV64ZVE32F-NEXT: ld a0, 24(a0)
-; RV64ZVE32F-NEXT: flh fa5, 0(a1)
-; RV64ZVE32F-NEXT: flh fa4, 0(a2)
-; RV64ZVE32F-NEXT: flh fa3, 0(a3)
-; RV64ZVE32F-NEXT: flh fa2, 0(a0)
+; RV64ZVE32F-NEXT: lh a1, 0(a1)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-NEXT: lh a3, 0(a3)
+; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.v.f v8, fa5
-; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa4
-; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa3
-; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa2
+; RV64ZVE32F-NEXT: vmv.v.x v8, a1
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
+; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
; RV64ZVE32F-NEXT: ret
- %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1), <4 x half> %passthru)
- ret <4 x half> %v
+ %v = call <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1), <4 x bfloat> %passthru)
+ ret <4 x bfloat> %v
}
-define <4 x half> @mgather_falsemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) {
-; RV32-LABEL: mgather_falsemask_v4f16:
+define <4 x bfloat> @mgather_falsemask_v4bf16(<4 x ptr> %ptrs, <4 x bfloat> %passthru) {
+; RV32-LABEL: mgather_falsemask_v4bf16:
; RV32: # %bb.0:
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
-; RV64V-LABEL: mgather_falsemask_v4f16:
+; RV64V-LABEL: mgather_falsemask_v4bf16:
; RV64V: # %bb.0:
; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
-; RV64ZVE32F-LABEL: mgather_falsemask_v4f16:
+; RV64ZVE32F-LABEL: mgather_falsemask_v4bf16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ret
- %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer, <4 x half> %passthru)
- ret <4 x half> %v
+ %v = call <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer, <4 x bfloat> %passthru)
+ ret <4 x bfloat> %v
}
-declare <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x half>)
+declare <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x bfloat>)
-define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passthru) {
-; RV32-LABEL: mgather_v8f16:
+define <8 x bfloat> @mgather_v8bf16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x bfloat> %passthru) {
+; RV32-LABEL: mgather_v8bf16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
-; RV32-NEXT: vmv.v.v v8, v10
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vmv.x.s a0, v0
+; RV32-NEXT: andi a1, a0, 1
+; RV32-NEXT: bnez a1, .LBB63_9
+; RV32-NEXT: # %bb.1: # %else
+; RV32-NEXT: andi a1, a0, 2
+; RV32-NEXT: bnez a1, .LBB63_10
+; RV32-NEXT: .LBB63_2: # %else2
+; RV32-NEXT: andi a1, a0, 4
+; RV32-NEXT: bnez a1, .LBB63_11
+; RV32-NEXT: .LBB63_3: # %else5
+; RV32-NEXT: andi a1, a0, 8
+; RV32-NEXT: bnez a1, .LBB63_12
+; RV32-NEXT: .LBB63_4: # %else8
+; RV32-NEXT: andi a1, a0, 16
+; RV32-NEXT: bnez a1, .LBB63_13
+; RV32-NEXT: .LBB63_5: # %else11
+; RV32-NEXT: andi a1, a0, 32
+; RV32-NEXT: bnez a1, .LBB63_14
+; RV32-NEXT: .LBB63_6: # %else14
+; RV32-NEXT: andi a1, a0, 64
+; RV32-NEXT: bnez a1, .LBB63_15
+; RV32-NEXT: .LBB63_7: # %else17
+; RV32-NEXT: andi a0, a0, -128
+; RV32-NEXT: bnez a0, .LBB63_16
+; RV32-NEXT: .LBB63_8: # %else20
+; RV32-NEXT: vmv1r.v v8, v10
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB63_9: # %cond.load
+; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.x.s a1, v8
+; RV32-NEXT: lh a1, 0(a1)
+; RV32-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV32-NEXT: vmv.s.x v10, a1
+; RV32-NEXT: andi a1, a0, 2
+; RV32-NEXT: beqz a1, .LBB63_2
+; RV32-NEXT: .LBB63_10: # %cond.load1
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v11, v8, 1
+; RV32-NEXT: vmv.x.s a1, v11
+; RV32-NEXT: lh a1, 0(a1)
+; RV32-NEXT: vmv.s.x v11, a1
+; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v10, v11, 1
+; RV32-NEXT: andi a1, a0, 4
+; RV32-NEXT: beqz a1, .LBB63_3
+; RV32-NEXT: .LBB63_11: # %cond.load4
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v11, v8, 2
+; RV32-NEXT: vmv.x.s a1, v11
+; RV32-NEXT: lh a1, 0(a1)
+; RV32-NEXT: vmv.s.x v11, a1
+; RV32-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v10, v11, 2
+; RV32-NEXT: andi a1, a0, 8
+; RV32-NEXT: beqz a1, .LBB63_4
+; RV32-NEXT: .LBB63_12: # %cond.load7
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v11, v8, 3
+; RV32-NEXT: vmv.x.s a1, v11
+; RV32-NEXT: lh a1, 0(a1)
+; RV32-NEXT: vmv.s.x v11, a1
+; RV32-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v10, v11, 3
+; RV32-NEXT: andi a1, a0, 16
+; RV32-NEXT: beqz a1, .LBB63_5
+; RV32-NEXT: .LBB63_13: # %cond.load10
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v8, 4
+; RV32-NEXT: vmv.x.s a1, v12
+; RV32-NEXT: lh a1, 0(a1)
+; RV32-NEXT: vmv.s.x v11, a1
+; RV32-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v10, v11, 4
+; RV32-NEXT: andi a1, a0, 32
+; RV32-NEXT: beqz a1, .LBB63_6
+; RV32-NEXT: .LBB63_14: # %cond.load13
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v8, 5
+; RV32-NEXT: vmv.x.s a1, v12
+; RV32-NEXT: lh a1, 0(a1)
+; RV32-NEXT: vmv.s.x v11, a1
+; RV32-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v10, v11, 5
+; RV32-NEXT: andi a1, a0, 64
+; RV32-NEXT: beqz a1, .LBB63_7
+; RV32-NEXT: .LBB63_15: # %cond.load16
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v8, 6
+; RV32-NEXT: vmv.x.s a1, v12
+; RV32-NEXT: lh a1, 0(a1)
+; RV32-NEXT: vmv.s.x v11, a1
+; RV32-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v10, v11, 6
+; RV32-NEXT: andi a0, a0, -128
+; RV32-NEXT: beqz a0, .LBB63_8
+; RV32-NEXT: .LBB63_16: # %cond.load19
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v8, 7
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32-NEXT: vslideup.vi v10, v8, 7
+; RV32-NEXT: vmv1r.v v8, v10
; RV32-NEXT: ret
;
-; RV64V-LABEL: mgather_v8f16:
+; RV64V-LABEL: mgather_v8bf16:
; RV64V: # %bb.0:
-; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
-; RV64V-NEXT: vmv.v.v v8, v12
+; RV64V-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64V-NEXT: vmv.x.s a0, v0
+; RV64V-NEXT: andi a1, a0, 1
+; RV64V-NEXT: bnez a1, .LBB63_11
+; RV64V-NEXT: # %bb.1: # %else
+; RV64V-NEXT: andi a1, a0, 2
+; RV64V-NEXT: bnez a1, .LBB63_12
+; RV64V-NEXT: .LBB63_2: # %else2
+; RV64V-NEXT: andi a1, a0, 4
+; RV64V-NEXT: bnez a1, .LBB63_13
+; RV64V-NEXT: .LBB63_3: # %else5
+; RV64V-NEXT: andi a1, a0, 8
+; RV64V-NEXT: beqz a1, .LBB63_5
+; RV64V-NEXT: .LBB63_4: # %cond.load7
+; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-NEXT: vslidedown.vi v14, v8, 3
+; RV64V-NEXT: vmv.x.s a1, v14
+; RV64V-NEXT: lh a1, 0(a1)
+; RV64V-NEXT: vmv.s.x v13, a1
+; RV64V-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v12, v13, 3
+; RV64V-NEXT: .LBB63_5: # %else8
+; RV64V-NEXT: addi sp, sp, -320
+; RV64V-NEXT: .cfi_def_cfa_offset 320
+; RV64V-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
+; RV64V-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
+; RV64V-NEXT: .cfi_offset ra, -8
+; RV64V-NEXT: .cfi_offset s0, -16
+; RV64V-NEXT: addi s0, sp, 320
+; RV64V-NEXT: .cfi_def_cfa s0, 0
+; RV64V-NEXT: andi sp, sp, -64
+; RV64V-NEXT: andi a1, a0, 16
+; RV64V-NEXT: bnez a1, .LBB63_14
+; RV64V-NEXT: # %bb.6: # %else11
+; RV64V-NEXT: andi a1, a0, 32
+; RV64V-NEXT: bnez a1, .LBB63_15
+; RV64V-NEXT: .LBB63_7: # %else14
+; RV64V-NEXT: andi a1, a0, 64
+; RV64V-NEXT: bnez a1, .LBB63_16
+; RV64V-NEXT: .LBB63_8: # %else17
+; RV64V-NEXT: andi a0, a0, -128
+; RV64V-NEXT: beqz a0, .LBB63_10
+; RV64V-NEXT: .LBB63_9: # %cond.load19
+; RV64V-NEXT: mv a0, sp
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vse64.v v8, (a0)
+; RV64V-NEXT: ld a0, 56(sp)
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-NEXT: vslideup.vi v12, v8, 7
+; RV64V-NEXT: .LBB63_10: # %else20
+; RV64V-NEXT: vmv1r.v v8, v12
+; RV64V-NEXT: addi sp, s0, -320
+; RV64V-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
+; RV64V-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
+; RV64V-NEXT: addi sp, sp, 320
; RV64V-NEXT: ret
-;
-; RV64ZVE32F-LABEL: mgather_v8f16:
+; RV64V-NEXT: .LBB63_11: # %cond.load
+; RV64V-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; RV64V-NEXT: vmv.x.s a1, v8
+; RV64V-NEXT: lh a1, 0(a1)
+; RV64V-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV64V-NEXT: vmv.s.x v12, a1
+; RV64V-NEXT: andi a1, a0, 2
+; RV64V-NEXT: beqz a1, .LBB63_2
+; RV64V-NEXT: .LBB63_12: # %cond.load1
+; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v13, v8, 1
+; RV64V-NEXT: vmv.x.s a1, v13
+; RV64V-NEXT: lh a1, 0(a1)
+; RV64V-NEXT: vmv.s.x v13, a1
+; RV64V-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v12, v13, 1
+; RV64V-NEXT: andi a1, a0, 4
+; RV64V-NEXT: beqz a1, .LBB63_3
+; RV64V-NEXT: .LBB63_13: # %cond.load4
+; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-NEXT: vslidedown.vi v14, v8, 2
+; RV64V-NEXT: vmv.x.s a1, v14
+; RV64V-NEXT: lh a1, 0(a1)
+; RV64V-NEXT: vmv.s.x v13, a1
+; RV64V-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v12, v13, 2
+; RV64V-NEXT: andi a1, a0, 8
+; RV64V-NEXT: bnez a1, .LBB63_4
+; RV64V-NEXT: j .LBB63_5
+; RV64V-NEXT: .LBB63_14: # %cond.load10
+; RV64V-NEXT: addi a1, sp, 192
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vse64.v v8, (a1)
+; RV64V-NEXT: ld a1, 224(sp)
+; RV64V-NEXT: lh a1, 0(a1)
+; RV64V-NEXT: vmv.s.x v13, a1
+; RV64V-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v12, v13, 4
+; RV64V-NEXT: andi a1, a0, 32
+; RV64V-NEXT: beqz a1, .LBB63_7
+; RV64V-NEXT: .LBB63_15: # %cond.load13
+; RV64V-NEXT: addi a1, sp, 128
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vse64.v v8, (a1)
+; RV64V-NEXT: ld a1, 168(sp)
+; RV64V-NEXT: lh a1, 0(a1)
+; RV64V-NEXT: vmv.s.x v13, a1
+; RV64V-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v12, v13, 5
+; RV64V-NEXT: andi a1, a0, 64
+; RV64V-NEXT: beqz a1, .LBB63_8
+; RV64V-NEXT: .LBB63_16: # %cond.load16
+; RV64V-NEXT: addi a1, sp, 64
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vse64.v v8, (a1)
+; RV64V-NEXT: ld a1, 112(sp)
+; RV64V-NEXT: lh a1, 0(a1)
+; RV64V-NEXT: vmv.s.x v13, a1
+; RV64V-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v12, v13, 6
+; RV64V-NEXT: andi a0, a0, -128
+; RV64V-NEXT: bnez a0, .LBB63_9
+; RV64V-NEXT: j .LBB63_10
+;
+; RV64ZVE32F-LABEL: mgather_v8bf16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
@@ -7134,93 +7584,314 @@ define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passt
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB63_9: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB63_2
; RV64ZVE32F-NEXT: .LBB63_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB63_3
; RV64ZVE32F-NEXT: .LBB63_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB63_4
; RV64ZVE32F-NEXT: .LBB63_12: # %cond.load7
; RV64ZVE32F-NEXT: ld a2, 24(a0)
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB63_5
; RV64ZVE32F-NEXT: .LBB63_13: # %cond.load10
; RV64ZVE32F-NEXT: ld a2, 32(a0)
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB63_6
; RV64ZVE32F-NEXT: .LBB63_14: # %cond.load13
; RV64ZVE32F-NEXT: ld a2, 40(a0)
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: beqz a2, .LBB63_7
; RV64ZVE32F-NEXT: .LBB63_15: # %cond.load16
; RV64ZVE32F-NEXT: ld a2, 48(a0)
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB63_8
; RV64ZVE32F-NEXT: .LBB63_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a0, 56(a0)
-; RV64ZVE32F-NEXT: flh fa5, 0(a0)
+; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7
; RV64ZVE32F-NEXT: ret
- %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
- ret <8 x half> %v
+ %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru)
+ ret <8 x bfloat> %v
}
-define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) {
-; RV32-LABEL: mgather_baseidx_v8i8_v8f16:
+define <8 x bfloat> @mgather_baseidx_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x bfloat> %passthru) {
+; RV32-LABEL: mgather_baseidx_v8i8_v8bf16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vsext.vf4 v10, v8
; RV32-NEXT: vadd.vv v10, v10, v10
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
-; RV32-NEXT: vmv.v.v v8, v9
+; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32-NEXT: vmv.x.s a1, v0
+; RV32-NEXT: andi a2, a1, 1
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vadd.vx v10, v10, a0
+; RV32-NEXT: bnez a2, .LBB64_9
+; RV32-NEXT: # %bb.1: # %else
+; RV32-NEXT: andi a0, a1, 2
+; RV32-NEXT: bnez a0, .LBB64_10
+; RV32-NEXT: .LBB64_2: # %else2
+; RV32-NEXT: andi a0, a1, 4
+; RV32-NEXT: bnez a0, .LBB64_11
+; RV32-NEXT: .LBB64_3: # %else5
+; RV32-NEXT: andi a0, a1, 8
+; RV32-NEXT: bnez a0, .LBB64_12
+; RV32-NEXT: .LBB64_4: # %else8
+; RV32-NEXT: andi a0, a1, 16
+; RV32-NEXT: bnez a0, .LBB64_13
+; RV32-NEXT: .LBB64_5: # %else11
+; RV32-NEXT: andi a0, a1, 32
+; RV32-NEXT: bnez a0, .LBB64_14
+; RV32-NEXT: .LBB64_6: # %else14
+; RV32-NEXT: andi a0, a1, 64
+; RV32-NEXT: bnez a0, .LBB64_15
+; RV32-NEXT: .LBB64_7: # %else17
+; RV32-NEXT: andi a0, a1, -128
+; RV32-NEXT: bnez a0, .LBB64_16
+; RV32-NEXT: .LBB64_8: # %else20
+; RV32-NEXT: vmv1r.v v8, v9
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB64_9: # %cond.load
+; RV32-NEXT: vmv.x.s a0, v10
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vsetvli zero, zero, e16, m1, tu, ma
+; RV32-NEXT: vmv.s.x v9, a0
+; RV32-NEXT: andi a0, a1, 2
+; RV32-NEXT: beqz a0, .LBB64_2
+; RV32-NEXT: .LBB64_10: # %cond.load1
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v10, 1
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v9, v8, 1
+; RV32-NEXT: andi a0, a1, 4
+; RV32-NEXT: beqz a0, .LBB64_3
+; RV32-NEXT: .LBB64_11: # %cond.load4
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v10, 2
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v9, v8, 2
+; RV32-NEXT: andi a0, a1, 8
+; RV32-NEXT: beqz a0, .LBB64_4
+; RV32-NEXT: .LBB64_12: # %cond.load7
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v10, 3
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v9, v8, 3
+; RV32-NEXT: andi a0, a1, 16
+; RV32-NEXT: beqz a0, .LBB64_5
+; RV32-NEXT: .LBB64_13: # %cond.load10
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v10, 4
+; RV32-NEXT: vmv.x.s a0, v12
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v9, v8, 4
+; RV32-NEXT: andi a0, a1, 32
+; RV32-NEXT: beqz a0, .LBB64_6
+; RV32-NEXT: .LBB64_14: # %cond.load13
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v10, 5
+; RV32-NEXT: vmv.x.s a0, v12
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v9, v8, 5
+; RV32-NEXT: andi a0, a1, 64
+; RV32-NEXT: beqz a0, .LBB64_7
+; RV32-NEXT: .LBB64_15: # %cond.load16
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v10, 6
+; RV32-NEXT: vmv.x.s a0, v12
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v9, v8, 6
+; RV32-NEXT: andi a0, a1, -128
+; RV32-NEXT: beqz a0, .LBB64_8
+; RV32-NEXT: .LBB64_16: # %cond.load19
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v10, 7
+; RV32-NEXT: vmv.x.s a0, v10
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32-NEXT: vslideup.vi v9, v8, 7
+; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
-; RV64V-LABEL: mgather_baseidx_v8i8_v8f16:
+; RV64V-LABEL: mgather_baseidx_v8i8_v8bf16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf8 v12, v8
; RV64V-NEXT: vadd.vv v12, v12, v12
-; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
-; RV64V-NEXT: vmv.v.v v8, v9
+; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV64V-NEXT: vmv.x.s a1, v0
+; RV64V-NEXT: andi a2, a1, 1
+; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-NEXT: vadd.vx v12, v12, a0
+; RV64V-NEXT: bnez a2, .LBB64_11
+; RV64V-NEXT: # %bb.1: # %else
+; RV64V-NEXT: andi a0, a1, 2
+; RV64V-NEXT: bnez a0, .LBB64_12
+; RV64V-NEXT: .LBB64_2: # %else2
+; RV64V-NEXT: andi a0, a1, 4
+; RV64V-NEXT: bnez a0, .LBB64_13
+; RV64V-NEXT: .LBB64_3: # %else5
+; RV64V-NEXT: andi a0, a1, 8
+; RV64V-NEXT: beqz a0, .LBB64_5
+; RV64V-NEXT: .LBB64_4: # %cond.load7
+; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-NEXT: vslidedown.vi v10, v12, 3
+; RV64V-NEXT: vmv.x.s a0, v10
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 3
+; RV64V-NEXT: .LBB64_5: # %else8
+; RV64V-NEXT: addi sp, sp, -320
+; RV64V-NEXT: .cfi_def_cfa_offset 320
+; RV64V-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
+; RV64V-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
+; RV64V-NEXT: .cfi_offset ra, -8
+; RV64V-NEXT: .cfi_offset s0, -16
+; RV64V-NEXT: addi s0, sp, 320
+; RV64V-NEXT: .cfi_def_cfa s0, 0
+; RV64V-NEXT: andi sp, sp, -64
+; RV64V-NEXT: andi a0, a1, 16
+; RV64V-NEXT: bnez a0, .LBB64_14
+; RV64V-NEXT: # %bb.6: # %else11
+; RV64V-NEXT: andi a0, a1, 32
+; RV64V-NEXT: bnez a0, .LBB64_15
+; RV64V-NEXT: .LBB64_7: # %else14
+; RV64V-NEXT: andi a0, a1, 64
+; RV64V-NEXT: bnez a0, .LBB64_16
+; RV64V-NEXT: .LBB64_8: # %else17
+; RV64V-NEXT: andi a0, a1, -128
+; RV64V-NEXT: beqz a0, .LBB64_10
+; RV64V-NEXT: .LBB64_9: # %cond.load19
+; RV64V-NEXT: mv a0, sp
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 56(sp)
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 7
+; RV64V-NEXT: .LBB64_10: # %else20
+; RV64V-NEXT: vmv1r.v v8, v9
+; RV64V-NEXT: addi sp, s0, -320
+; RV64V-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
+; RV64V-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
+; RV64V-NEXT: addi sp, sp, 320
; RV64V-NEXT: ret
-;
-; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8f16:
+; RV64V-NEXT: .LBB64_11: # %cond.load
+; RV64V-NEXT: vmv.x.s a0, v12
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, tu, ma
+; RV64V-NEXT: vmv.s.x v9, a0
+; RV64V-NEXT: andi a0, a1, 2
+; RV64V-NEXT: beqz a0, .LBB64_2
+; RV64V-NEXT: .LBB64_12: # %cond.load1
+; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v8, v12, 1
+; RV64V-NEXT: vmv.x.s a0, v8
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 1
+; RV64V-NEXT: andi a0, a1, 4
+; RV64V-NEXT: beqz a0, .LBB64_3
+; RV64V-NEXT: .LBB64_13: # %cond.load4
+; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-NEXT: vslidedown.vi v10, v12, 2
+; RV64V-NEXT: vmv.x.s a0, v10
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 2
+; RV64V-NEXT: andi a0, a1, 8
+; RV64V-NEXT: bnez a0, .LBB64_4
+; RV64V-NEXT: j .LBB64_5
+; RV64V-NEXT: .LBB64_14: # %cond.load10
+; RV64V-NEXT: addi a0, sp, 192
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 224(sp)
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 4
+; RV64V-NEXT: andi a0, a1, 32
+; RV64V-NEXT: beqz a0, .LBB64_7
+; RV64V-NEXT: .LBB64_15: # %cond.load13
+; RV64V-NEXT: addi a0, sp, 128
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 168(sp)
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 5
+; RV64V-NEXT: andi a0, a1, 64
+; RV64V-NEXT: beqz a0, .LBB64_8
+; RV64V-NEXT: .LBB64_16: # %cond.load16
+; RV64V-NEXT: addi a0, sp, 64
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 112(sp)
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 6
+; RV64V-NEXT: andi a0, a1, -128
+; RV64V-NEXT: bnez a0, .LBB64_9
+; RV64V-NEXT: j .LBB64_10
+;
+; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8bf16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
@@ -7230,9 +7901,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: .LBB64_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB64_4
@@ -7242,9 +7913,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32F-NEXT: .LBB64_4: # %else2
@@ -7269,9 +7940,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
; RV64ZVE32F-NEXT: .LBB64_9: # %else14
@@ -7289,9 +7960,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
@@ -7302,9 +7973,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
@@ -7314,9 +7985,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
@@ -7326,9 +7997,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
@@ -7339,40 +8010,261 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 1
; RV64ZVE32F-NEXT: add a0, a0, a1
-; RV64ZVE32F-NEXT: flh fa5, 0(a0)
+; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
- %ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs
- %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
- ret <8 x half> %v
+ %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i8> %idxs
+ %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru)
+ ret <8 x bfloat> %v
}
-define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) {
-; RV32-LABEL: mgather_baseidx_sext_v8i8_v8f16:
+define <8 x bfloat> @mgather_baseidx_sext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x bfloat> %passthru) {
+; RV32-LABEL: mgather_baseidx_sext_v8i8_v8bf16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vsext.vf4 v10, v8
; RV32-NEXT: vadd.vv v10, v10, v10
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
-; RV32-NEXT: vmv.v.v v8, v9
+; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32-NEXT: vmv.x.s a1, v0
+; RV32-NEXT: andi a2, a1, 1
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vadd.vx v10, v10, a0
+; RV32-NEXT: bnez a2, .LBB65_9
+; RV32-NEXT: # %bb.1: # %else
+; RV32-NEXT: andi a0, a1, 2
+; RV32-NEXT: bnez a0, .LBB65_10
+; RV32-NEXT: .LBB65_2: # %else2
+; RV32-NEXT: andi a0, a1, 4
+; RV32-NEXT: bnez a0, .LBB65_11
+; RV32-NEXT: .LBB65_3: # %else5
+; RV32-NEXT: andi a0, a1, 8
+; RV32-NEXT: bnez a0, .LBB65_12
+; RV32-NEXT: .LBB65_4: # %else8
+; RV32-NEXT: andi a0, a1, 16
+; RV32-NEXT: bnez a0, .LBB65_13
+; RV32-NEXT: .LBB65_5: # %else11
+; RV32-NEXT: andi a0, a1, 32
+; RV32-NEXT: bnez a0, .LBB65_14
+; RV32-NEXT: .LBB65_6: # %else14
+; RV32-NEXT: andi a0, a1, 64
+; RV32-NEXT: bnez a0, .LBB65_15
+; RV32-NEXT: .LBB65_7: # %else17
+; RV32-NEXT: andi a0, a1, -128
+; RV32-NEXT: bnez a0, .LBB65_16
+; RV32-NEXT: .LBB65_8: # %else20
+; RV32-NEXT: vmv1r.v v8, v9
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB65_9: # %cond.load
+; RV32-NEXT: vmv.x.s a0, v10
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vsetvli zero, zero, e16, m1, tu, ma
+; RV32-NEXT: vmv.s.x v9, a0
+; RV32-NEXT: andi a0, a1, 2
+; RV32-NEXT: beqz a0, .LBB65_2
+; RV32-NEXT: .LBB65_10: # %cond.load1
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v10, 1
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v9, v8, 1
+; RV32-NEXT: andi a0, a1, 4
+; RV32-NEXT: beqz a0, .LBB65_3
+; RV32-NEXT: .LBB65_11: # %cond.load4
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v10, 2
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v9, v8, 2
+; RV32-NEXT: andi a0, a1, 8
+; RV32-NEXT: beqz a0, .LBB65_4
+; RV32-NEXT: .LBB65_12: # %cond.load7
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v10, 3
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v9, v8, 3
+; RV32-NEXT: andi a0, a1, 16
+; RV32-NEXT: beqz a0, .LBB65_5
+; RV32-NEXT: .LBB65_13: # %cond.load10
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v10, 4
+; RV32-NEXT: vmv.x.s a0, v12
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v9, v8, 4
+; RV32-NEXT: andi a0, a1, 32
+; RV32-NEXT: beqz a0, .LBB65_6
+; RV32-NEXT: .LBB65_14: # %cond.load13
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v10, 5
+; RV32-NEXT: vmv.x.s a0, v12
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v9, v8, 5
+; RV32-NEXT: andi a0, a1, 64
+; RV32-NEXT: beqz a0, .LBB65_7
+; RV32-NEXT: .LBB65_15: # %cond.load16
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v10, 6
+; RV32-NEXT: vmv.x.s a0, v12
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v9, v8, 6
+; RV32-NEXT: andi a0, a1, -128
+; RV32-NEXT: beqz a0, .LBB65_8
+; RV32-NEXT: .LBB65_16: # %cond.load19
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v10, 7
+; RV32-NEXT: vmv.x.s a0, v10
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32-NEXT: vslideup.vi v9, v8, 7
+; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
-; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8f16:
+; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8bf16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf8 v12, v8
; RV64V-NEXT: vadd.vv v12, v12, v12
-; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
-; RV64V-NEXT: vmv.v.v v8, v9
+; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV64V-NEXT: vmv.x.s a1, v0
+; RV64V-NEXT: andi a2, a1, 1
+; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-NEXT: vadd.vx v12, v12, a0
+; RV64V-NEXT: bnez a2, .LBB65_11
+; RV64V-NEXT: # %bb.1: # %else
+; RV64V-NEXT: andi a0, a1, 2
+; RV64V-NEXT: bnez a0, .LBB65_12
+; RV64V-NEXT: .LBB65_2: # %else2
+; RV64V-NEXT: andi a0, a1, 4
+; RV64V-NEXT: bnez a0, .LBB65_13
+; RV64V-NEXT: .LBB65_3: # %else5
+; RV64V-NEXT: andi a0, a1, 8
+; RV64V-NEXT: beqz a0, .LBB65_5
+; RV64V-NEXT: .LBB65_4: # %cond.load7
+; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-NEXT: vslidedown.vi v10, v12, 3
+; RV64V-NEXT: vmv.x.s a0, v10
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 3
+; RV64V-NEXT: .LBB65_5: # %else8
+; RV64V-NEXT: addi sp, sp, -320
+; RV64V-NEXT: .cfi_def_cfa_offset 320
+; RV64V-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
+; RV64V-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
+; RV64V-NEXT: .cfi_offset ra, -8
+; RV64V-NEXT: .cfi_offset s0, -16
+; RV64V-NEXT: addi s0, sp, 320
+; RV64V-NEXT: .cfi_def_cfa s0, 0
+; RV64V-NEXT: andi sp, sp, -64
+; RV64V-NEXT: andi a0, a1, 16
+; RV64V-NEXT: bnez a0, .LBB65_14
+; RV64V-NEXT: # %bb.6: # %else11
+; RV64V-NEXT: andi a0, a1, 32
+; RV64V-NEXT: bnez a0, .LBB65_15
+; RV64V-NEXT: .LBB65_7: # %else14
+; RV64V-NEXT: andi a0, a1, 64
+; RV64V-NEXT: bnez a0, .LBB65_16
+; RV64V-NEXT: .LBB65_8: # %else17
+; RV64V-NEXT: andi a0, a1, -128
+; RV64V-NEXT: beqz a0, .LBB65_10
+; RV64V-NEXT: .LBB65_9: # %cond.load19
+; RV64V-NEXT: mv a0, sp
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 56(sp)
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 7
+; RV64V-NEXT: .LBB65_10: # %else20
+; RV64V-NEXT: vmv1r.v v8, v9
+; RV64V-NEXT: addi sp, s0, -320
+; RV64V-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
+; RV64V-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
+; RV64V-NEXT: addi sp, sp, 320
; RV64V-NEXT: ret
-;
-; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8f16:
+; RV64V-NEXT: .LBB65_11: # %cond.load
+; RV64V-NEXT: vmv.x.s a0, v12
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, tu, ma
+; RV64V-NEXT: vmv.s.x v9, a0
+; RV64V-NEXT: andi a0, a1, 2
+; RV64V-NEXT: beqz a0, .LBB65_2
+; RV64V-NEXT: .LBB65_12: # %cond.load1
+; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v8, v12, 1
+; RV64V-NEXT: vmv.x.s a0, v8
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 1
+; RV64V-NEXT: andi a0, a1, 4
+; RV64V-NEXT: beqz a0, .LBB65_3
+; RV64V-NEXT: .LBB65_13: # %cond.load4
+; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-NEXT: vslidedown.vi v10, v12, 2
+; RV64V-NEXT: vmv.x.s a0, v10
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 2
+; RV64V-NEXT: andi a0, a1, 8
+; RV64V-NEXT: bnez a0, .LBB65_4
+; RV64V-NEXT: j .LBB65_5
+; RV64V-NEXT: .LBB65_14: # %cond.load10
+; RV64V-NEXT: addi a0, sp, 192
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 224(sp)
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 4
+; RV64V-NEXT: andi a0, a1, 32
+; RV64V-NEXT: beqz a0, .LBB65_7
+; RV64V-NEXT: .LBB65_15: # %cond.load13
+; RV64V-NEXT: addi a0, sp, 128
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 168(sp)
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 5
+; RV64V-NEXT: andi a0, a1, 64
+; RV64V-NEXT: beqz a0, .LBB65_8
+; RV64V-NEXT: .LBB65_16: # %cond.load16
+; RV64V-NEXT: addi a0, sp, 64
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 112(sp)
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 6
+; RV64V-NEXT: andi a0, a1, -128
+; RV64V-NEXT: bnez a0, .LBB65_9
+; RV64V-NEXT: j .LBB65_10
+;
+; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8bf16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
@@ -7382,9 +8274,9 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: .LBB65_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB65_4
@@ -7394,9 +8286,9 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32F-NEXT: .LBB65_4: # %else2
@@ -7421,9 +8313,9 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
; RV64ZVE32F-NEXT: .LBB65_9: # %else14
@@ -7441,9 +8333,9 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
@@ -7454,9 +8346,9 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
@@ -7466,9 +8358,9 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
@@ -7478,9 +8370,9 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
@@ -7491,39 +8383,262 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 1
; RV64ZVE32F-NEXT: add a0, a0, a1
-; RV64ZVE32F-NEXT: flh fa5, 0(a0)
+; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
%eidxs = sext <8 x i8> %idxs to <8 x i16>
- %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
- %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
- ret <8 x half> %v
+ %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs
+ %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru)
+ ret <8 x bfloat> %v
}
-define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) {
-; RV32-LABEL: mgather_baseidx_zext_v8i8_v8f16:
+define <8 x bfloat> @mgather_baseidx_zext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x bfloat> %passthru) {
+; RV32-LABEL: mgather_baseidx_zext_v8i8_v8bf16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vwaddu.vv v10, v8, v8
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT: vluxei16.v v9, (a0), v10, v0.t
-; RV32-NEXT: vmv.v.v v8, v9
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vzext.vf4 v10, v8
+; RV32-NEXT: vadd.vv v10, v10, v10
+; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32-NEXT: vmv.x.s a1, v0
+; RV32-NEXT: andi a2, a1, 1
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vadd.vx v10, v10, a0
+; RV32-NEXT: bnez a2, .LBB66_9
+; RV32-NEXT: # %bb.1: # %else
+; RV32-NEXT: andi a0, a1, 2
+; RV32-NEXT: bnez a0, .LBB66_10
+; RV32-NEXT: .LBB66_2: # %else2
+; RV32-NEXT: andi a0, a1, 4
+; RV32-NEXT: bnez a0, .LBB66_11
+; RV32-NEXT: .LBB66_3: # %else5
+; RV32-NEXT: andi a0, a1, 8
+; RV32-NEXT: bnez a0, .LBB66_12
+; RV32-NEXT: .LBB66_4: # %else8
+; RV32-NEXT: andi a0, a1, 16
+; RV32-NEXT: bnez a0, .LBB66_13
+; RV32-NEXT: .LBB66_5: # %else11
+; RV32-NEXT: andi a0, a1, 32
+; RV32-NEXT: bnez a0, .LBB66_14
+; RV32-NEXT: .LBB66_6: # %else14
+; RV32-NEXT: andi a0, a1, 64
+; RV32-NEXT: bnez a0, .LBB66_15
+; RV32-NEXT: .LBB66_7: # %else17
+; RV32-NEXT: andi a0, a1, -128
+; RV32-NEXT: bnez a0, .LBB66_16
+; RV32-NEXT: .LBB66_8: # %else20
+; RV32-NEXT: vmv1r.v v8, v9
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB66_9: # %cond.load
+; RV32-NEXT: vmv.x.s a0, v10
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vsetvli zero, zero, e16, m1, tu, ma
+; RV32-NEXT: vmv.s.x v9, a0
+; RV32-NEXT: andi a0, a1, 2
+; RV32-NEXT: beqz a0, .LBB66_2
+; RV32-NEXT: .LBB66_10: # %cond.load1
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v10, 1
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v9, v8, 1
+; RV32-NEXT: andi a0, a1, 4
+; RV32-NEXT: beqz a0, .LBB66_3
+; RV32-NEXT: .LBB66_11: # %cond.load4
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v10, 2
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v9, v8, 2
+; RV32-NEXT: andi a0, a1, 8
+; RV32-NEXT: beqz a0, .LBB66_4
+; RV32-NEXT: .LBB66_12: # %cond.load7
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v10, 3
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v9, v8, 3
+; RV32-NEXT: andi a0, a1, 16
+; RV32-NEXT: beqz a0, .LBB66_5
+; RV32-NEXT: .LBB66_13: # %cond.load10
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v10, 4
+; RV32-NEXT: vmv.x.s a0, v12
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v9, v8, 4
+; RV32-NEXT: andi a0, a1, 32
+; RV32-NEXT: beqz a0, .LBB66_6
+; RV32-NEXT: .LBB66_14: # %cond.load13
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v10, 5
+; RV32-NEXT: vmv.x.s a0, v12
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v9, v8, 5
+; RV32-NEXT: andi a0, a1, 64
+; RV32-NEXT: beqz a0, .LBB66_7
+; RV32-NEXT: .LBB66_15: # %cond.load16
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v10, 6
+; RV32-NEXT: vmv.x.s a0, v12
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v9, v8, 6
+; RV32-NEXT: andi a0, a1, -128
+; RV32-NEXT: beqz a0, .LBB66_8
+; RV32-NEXT: .LBB66_16: # %cond.load19
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v10, 7
+; RV32-NEXT: vmv.x.s a0, v10
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32-NEXT: vslideup.vi v9, v8, 7
+; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
-; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f16:
+; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8bf16:
; RV64V: # %bb.0:
-; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64V-NEXT: vwaddu.vv v10, v8, v8
-; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV64V-NEXT: vluxei16.v v9, (a0), v10, v0.t
-; RV64V-NEXT: vmv.v.v v8, v9
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vzext.vf8 v12, v8
+; RV64V-NEXT: vadd.vv v12, v12, v12
+; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV64V-NEXT: vmv.x.s a1, v0
+; RV64V-NEXT: andi a2, a1, 1
+; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-NEXT: vadd.vx v12, v12, a0
+; RV64V-NEXT: bnez a2, .LBB66_11
+; RV64V-NEXT: # %bb.1: # %else
+; RV64V-NEXT: andi a0, a1, 2
+; RV64V-NEXT: bnez a0, .LBB66_12
+; RV64V-NEXT: .LBB66_2: # %else2
+; RV64V-NEXT: andi a0, a1, 4
+; RV64V-NEXT: bnez a0, .LBB66_13
+; RV64V-NEXT: .LBB66_3: # %else5
+; RV64V-NEXT: andi a0, a1, 8
+; RV64V-NEXT: beqz a0, .LBB66_5
+; RV64V-NEXT: .LBB66_4: # %cond.load7
+; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-NEXT: vslidedown.vi v10, v12, 3
+; RV64V-NEXT: vmv.x.s a0, v10
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 3
+; RV64V-NEXT: .LBB66_5: # %else8
+; RV64V-NEXT: addi sp, sp, -320
+; RV64V-NEXT: .cfi_def_cfa_offset 320
+; RV64V-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
+; RV64V-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
+; RV64V-NEXT: .cfi_offset ra, -8
+; RV64V-NEXT: .cfi_offset s0, -16
+; RV64V-NEXT: addi s0, sp, 320
+; RV64V-NEXT: .cfi_def_cfa s0, 0
+; RV64V-NEXT: andi sp, sp, -64
+; RV64V-NEXT: andi a0, a1, 16
+; RV64V-NEXT: bnez a0, .LBB66_14
+; RV64V-NEXT: # %bb.6: # %else11
+; RV64V-NEXT: andi a0, a1, 32
+; RV64V-NEXT: bnez a0, .LBB66_15
+; RV64V-NEXT: .LBB66_7: # %else14
+; RV64V-NEXT: andi a0, a1, 64
+; RV64V-NEXT: bnez a0, .LBB66_16
+; RV64V-NEXT: .LBB66_8: # %else17
+; RV64V-NEXT: andi a0, a1, -128
+; RV64V-NEXT: beqz a0, .LBB66_10
+; RV64V-NEXT: .LBB66_9: # %cond.load19
+; RV64V-NEXT: mv a0, sp
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 56(sp)
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 7
+; RV64V-NEXT: .LBB66_10: # %else20
+; RV64V-NEXT: vmv1r.v v8, v9
+; RV64V-NEXT: addi sp, s0, -320
+; RV64V-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
+; RV64V-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
+; RV64V-NEXT: addi sp, sp, 320
; RV64V-NEXT: ret
-;
-; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8f16:
+; RV64V-NEXT: .LBB66_11: # %cond.load
+; RV64V-NEXT: vmv.x.s a0, v12
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, tu, ma
+; RV64V-NEXT: vmv.s.x v9, a0
+; RV64V-NEXT: andi a0, a1, 2
+; RV64V-NEXT: beqz a0, .LBB66_2
+; RV64V-NEXT: .LBB66_12: # %cond.load1
+; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v8, v12, 1
+; RV64V-NEXT: vmv.x.s a0, v8
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 1
+; RV64V-NEXT: andi a0, a1, 4
+; RV64V-NEXT: beqz a0, .LBB66_3
+; RV64V-NEXT: .LBB66_13: # %cond.load4
+; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-NEXT: vslidedown.vi v10, v12, 2
+; RV64V-NEXT: vmv.x.s a0, v10
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 2
+; RV64V-NEXT: andi a0, a1, 8
+; RV64V-NEXT: bnez a0, .LBB66_4
+; RV64V-NEXT: j .LBB66_5
+; RV64V-NEXT: .LBB66_14: # %cond.load10
+; RV64V-NEXT: addi a0, sp, 192
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 224(sp)
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 4
+; RV64V-NEXT: andi a0, a1, 32
+; RV64V-NEXT: beqz a0, .LBB66_7
+; RV64V-NEXT: .LBB66_15: # %cond.load13
+; RV64V-NEXT: addi a0, sp, 128
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 168(sp)
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 5
+; RV64V-NEXT: andi a0, a1, 64
+; RV64V-NEXT: beqz a0, .LBB66_8
+; RV64V-NEXT: .LBB66_16: # %cond.load16
+; RV64V-NEXT: addi a0, sp, 64
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 112(sp)
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 6
+; RV64V-NEXT: andi a0, a1, -128
+; RV64V-NEXT: bnez a0, .LBB66_9
+; RV64V-NEXT: j .LBB66_10
+;
+; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8bf16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
@@ -7534,9 +8649,9 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: .LBB66_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB66_4
@@ -7547,9 +8662,9 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32F-NEXT: .LBB66_4: # %else2
@@ -7575,9 +8690,9 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
; RV64ZVE32F-NEXT: .LBB66_9: # %else14
@@ -7596,9 +8711,9 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
@@ -7610,9 +8725,9 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
@@ -7623,9 +8738,9 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
@@ -7636,9 +8751,9 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
@@ -7650,39 +8765,261 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-NEXT: andi a1, a1, 255
; RV64ZVE32F-NEXT: slli a1, a1, 1
; RV64ZVE32F-NEXT: add a0, a0, a1
-; RV64ZVE32F-NEXT: flh fa5, 0(a0)
+; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
%eidxs = zext <8 x i8> %idxs to <8 x i16>
- %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
- %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
- ret <8 x half> %v
+ %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs
+ %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru)
+ ret <8 x bfloat> %v
}
-define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x half> %passthru) {
-; RV32-LABEL: mgather_baseidx_v8f16:
+define <8 x bfloat> @mgather_baseidx_v8bf16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x bfloat> %passthru) {
+; RV32-LABEL: mgather_baseidx_v8bf16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
+; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vwadd.vv v10, v8, v8
-; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
-; RV32-NEXT: vmv.v.v v8, v9
+; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32-NEXT: vmv.x.s a1, v0
+; RV32-NEXT: andi a2, a1, 1
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vadd.vx v10, v10, a0
+; RV32-NEXT: bnez a2, .LBB67_9
+; RV32-NEXT: # %bb.1: # %else
+; RV32-NEXT: andi a0, a1, 2
+; RV32-NEXT: bnez a0, .LBB67_10
+; RV32-NEXT: .LBB67_2: # %else2
+; RV32-NEXT: andi a0, a1, 4
+; RV32-NEXT: bnez a0, .LBB67_11
+; RV32-NEXT: .LBB67_3: # %else5
+; RV32-NEXT: andi a0, a1, 8
+; RV32-NEXT: bnez a0, .LBB67_12
+; RV32-NEXT: .LBB67_4: # %else8
+; RV32-NEXT: andi a0, a1, 16
+; RV32-NEXT: bnez a0, .LBB67_13
+; RV32-NEXT: .LBB67_5: # %else11
+; RV32-NEXT: andi a0, a1, 32
+; RV32-NEXT: bnez a0, .LBB67_14
+; RV32-NEXT: .LBB67_6: # %else14
+; RV32-NEXT: andi a0, a1, 64
+; RV32-NEXT: bnez a0, .LBB67_15
+; RV32-NEXT: .LBB67_7: # %else17
+; RV32-NEXT: andi a0, a1, -128
+; RV32-NEXT: bnez a0, .LBB67_16
+; RV32-NEXT: .LBB67_8: # %else20
+; RV32-NEXT: vmv1r.v v8, v9
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB67_9: # %cond.load
+; RV32-NEXT: vmv.x.s a0, v10
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vsetvli zero, zero, e16, m1, tu, ma
+; RV32-NEXT: vmv.s.x v9, a0
+; RV32-NEXT: andi a0, a1, 2
+; RV32-NEXT: beqz a0, .LBB67_2
+; RV32-NEXT: .LBB67_10: # %cond.load1
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v10, 1
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v9, v8, 1
+; RV32-NEXT: andi a0, a1, 4
+; RV32-NEXT: beqz a0, .LBB67_3
+; RV32-NEXT: .LBB67_11: # %cond.load4
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v10, 2
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v9, v8, 2
+; RV32-NEXT: andi a0, a1, 8
+; RV32-NEXT: beqz a0, .LBB67_4
+; RV32-NEXT: .LBB67_12: # %cond.load7
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v10, 3
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v9, v8, 3
+; RV32-NEXT: andi a0, a1, 16
+; RV32-NEXT: beqz a0, .LBB67_5
+; RV32-NEXT: .LBB67_13: # %cond.load10
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v10, 4
+; RV32-NEXT: vmv.x.s a0, v12
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v9, v8, 4
+; RV32-NEXT: andi a0, a1, 32
+; RV32-NEXT: beqz a0, .LBB67_6
+; RV32-NEXT: .LBB67_14: # %cond.load13
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v10, 5
+; RV32-NEXT: vmv.x.s a0, v12
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v9, v8, 5
+; RV32-NEXT: andi a0, a1, 64
+; RV32-NEXT: beqz a0, .LBB67_7
+; RV32-NEXT: .LBB67_15: # %cond.load16
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v10, 6
+; RV32-NEXT: vmv.x.s a0, v12
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV32-NEXT: vslideup.vi v9, v8, 6
+; RV32-NEXT: andi a0, a1, -128
+; RV32-NEXT: beqz a0, .LBB67_8
+; RV32-NEXT: .LBB67_16: # %cond.load19
+; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v10, 7
+; RV32-NEXT: vmv.x.s a0, v10
+; RV32-NEXT: lh a0, 0(a0)
+; RV32-NEXT: vmv.s.x v8, a0
+; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32-NEXT: vslideup.vi v9, v8, 7
+; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
-; RV64V-LABEL: mgather_baseidx_v8f16:
+; RV64V-LABEL: mgather_baseidx_v8bf16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf4 v12, v8
; RV64V-NEXT: vadd.vv v12, v12, v12
-; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
-; RV64V-NEXT: vmv.v.v v8, v9
+; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV64V-NEXT: vmv.x.s a1, v0
+; RV64V-NEXT: andi a2, a1, 1
+; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-NEXT: vadd.vx v12, v12, a0
+; RV64V-NEXT: bnez a2, .LBB67_11
+; RV64V-NEXT: # %bb.1: # %else
+; RV64V-NEXT: andi a0, a1, 2
+; RV64V-NEXT: bnez a0, .LBB67_12
+; RV64V-NEXT: .LBB67_2: # %else2
+; RV64V-NEXT: andi a0, a1, 4
+; RV64V-NEXT: bnez a0, .LBB67_13
+; RV64V-NEXT: .LBB67_3: # %else5
+; RV64V-NEXT: andi a0, a1, 8
+; RV64V-NEXT: beqz a0, .LBB67_5
+; RV64V-NEXT: .LBB67_4: # %cond.load7
+; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-NEXT: vslidedown.vi v10, v12, 3
+; RV64V-NEXT: vmv.x.s a0, v10
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 3
+; RV64V-NEXT: .LBB67_5: # %else8
+; RV64V-NEXT: addi sp, sp, -320
+; RV64V-NEXT: .cfi_def_cfa_offset 320
+; RV64V-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
+; RV64V-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
+; RV64V-NEXT: .cfi_offset ra, -8
+; RV64V-NEXT: .cfi_offset s0, -16
+; RV64V-NEXT: addi s0, sp, 320
+; RV64V-NEXT: .cfi_def_cfa s0, 0
+; RV64V-NEXT: andi sp, sp, -64
+; RV64V-NEXT: andi a0, a1, 16
+; RV64V-NEXT: bnez a0, .LBB67_14
+; RV64V-NEXT: # %bb.6: # %else11
+; RV64V-NEXT: andi a0, a1, 32
+; RV64V-NEXT: bnez a0, .LBB67_15
+; RV64V-NEXT: .LBB67_7: # %else14
+; RV64V-NEXT: andi a0, a1, 64
+; RV64V-NEXT: bnez a0, .LBB67_16
+; RV64V-NEXT: .LBB67_8: # %else17
+; RV64V-NEXT: andi a0, a1, -128
+; RV64V-NEXT: beqz a0, .LBB67_10
+; RV64V-NEXT: .LBB67_9: # %cond.load19
+; RV64V-NEXT: mv a0, sp
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 56(sp)
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 7
+; RV64V-NEXT: .LBB67_10: # %else20
+; RV64V-NEXT: vmv1r.v v8, v9
+; RV64V-NEXT: addi sp, s0, -320
+; RV64V-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
+; RV64V-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
+; RV64V-NEXT: addi sp, sp, 320
; RV64V-NEXT: ret
-;
-; RV64ZVE32F-LABEL: mgather_baseidx_v8f16:
+; RV64V-NEXT: .LBB67_11: # %cond.load
+; RV64V-NEXT: vmv.x.s a0, v12
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, tu, ma
+; RV64V-NEXT: vmv.s.x v9, a0
+; RV64V-NEXT: andi a0, a1, 2
+; RV64V-NEXT: beqz a0, .LBB67_2
+; RV64V-NEXT: .LBB67_12: # %cond.load1
+; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v8, v12, 1
+; RV64V-NEXT: vmv.x.s a0, v8
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 1
+; RV64V-NEXT: andi a0, a1, 4
+; RV64V-NEXT: beqz a0, .LBB67_3
+; RV64V-NEXT: .LBB67_13: # %cond.load4
+; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-NEXT: vslidedown.vi v10, v12, 2
+; RV64V-NEXT: vmv.x.s a0, v10
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 2
+; RV64V-NEXT: andi a0, a1, 8
+; RV64V-NEXT: bnez a0, .LBB67_4
+; RV64V-NEXT: j .LBB67_5
+; RV64V-NEXT: .LBB67_14: # %cond.load10
+; RV64V-NEXT: addi a0, sp, 192
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 224(sp)
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 4
+; RV64V-NEXT: andi a0, a1, 32
+; RV64V-NEXT: beqz a0, .LBB67_7
+; RV64V-NEXT: .LBB67_15: # %cond.load13
+; RV64V-NEXT: addi a0, sp, 128
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 168(sp)
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 5
+; RV64V-NEXT: andi a0, a1, 64
+; RV64V-NEXT: beqz a0, .LBB67_8
+; RV64V-NEXT: .LBB67_16: # %cond.load16
+; RV64V-NEXT: addi a0, sp, 64
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 112(sp)
+; RV64V-NEXT: lh a0, 0(a0)
+; RV64V-NEXT: vmv.s.x v8, a0
+; RV64V-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV64V-NEXT: vslideup.vi v9, v8, 6
+; RV64V-NEXT: andi a0, a1, -128
+; RV64V-NEXT: bnez a0, .LBB67_9
+; RV64V-NEXT: j .LBB67_10
+;
+; RV64ZVE32F-LABEL: mgather_baseidx_v8bf16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
@@ -7693,8 +9030,8 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
-; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: .LBB67_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB67_4
@@ -7704,8 +9041,8 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
-; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32F-NEXT: .LBB67_4: # %else2
@@ -7730,8 +9067,8 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
; RV64ZVE32F-NEXT: .LBB67_9: # %else14
@@ -7749,8 +9086,8 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
-; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
@@ -7761,8 +9098,8 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
@@ -7772,8 +9109,8 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB67_8
@@ -7782,8 +9119,8 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: flh fa5, 0(a2)
-; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
+; RV64ZVE32F-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
@@ -7794,12 +9131,3819 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 1
; RV64ZVE32F-NEXT: add a0, a0, a1
-; RV64ZVE32F-NEXT: flh fa5, 0(a0)
-; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-NEXT: lh a0, 0(a0)
+; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
+ %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %idxs
+ %v = call <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x bfloat> %passthru)
+ ret <8 x bfloat> %v
+}
+
+declare <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x half>)
+
+define <1 x half> @mgather_v1f16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x half> %passthru) {
+; RV32V-ZVFH-LABEL: mgather_v1f16:
+; RV32V-ZVFH: # %bb.0:
+; RV32V-ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
+; RV32V-ZVFH-NEXT: vluxei32.v v9, (zero), v8, v0.t
+; RV32V-ZVFH-NEXT: vmv1r.v v8, v9
+; RV32V-ZVFH-NEXT: ret
+;
+; RV64V-ZVFH-LABEL: mgather_v1f16:
+; RV64V-ZVFH: # %bb.0:
+; RV64V-ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
+; RV64V-ZVFH-NEXT: vluxei64.v v9, (zero), v8, v0.t
+; RV64V-ZVFH-NEXT: vmv1r.v v8, v9
+; RV64V-ZVFH-NEXT: ret
+;
+; RV32V-ZVFHMIN-LABEL: mgather_v1f16:
+; RV32V-ZVFHMIN: # %bb.0:
+; RV32V-ZVFHMIN-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; RV32V-ZVFHMIN-NEXT: vfirst.m a0, v0
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB68_2
+; RV32V-ZVFHMIN-NEXT: # %bb.1: # %cond.load
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV32V-ZVFHMIN-NEXT: vle16.v v9, (a0)
+; RV32V-ZVFHMIN-NEXT: .LBB68_2: # %else
+; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV32V-ZVFHMIN-NEXT: ret
+;
+; RV64V-ZVFHMIN-LABEL: mgather_v1f16:
+; RV64V-ZVFHMIN: # %bb.0:
+; RV64V-ZVFHMIN-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; RV64V-ZVFHMIN-NEXT: vfirst.m a0, v0
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB68_2
+; RV64V-ZVFHMIN-NEXT: # %bb.1: # %cond.load
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vle16.v v9, (a0)
+; RV64V-ZVFHMIN-NEXT: .LBB68_2: # %else
+; RV64V-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV64V-ZVFHMIN-NEXT: ret
+;
+; RV32ZVE32F-ZVFH-LABEL: mgather_v1f16:
+; RV32ZVE32F-ZVFH: # %bb.0:
+; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, mu
+; RV32ZVE32F-ZVFH-NEXT: vluxei32.v v9, (zero), v8, v0.t
+; RV32ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9
+; RV32ZVE32F-ZVFH-NEXT: ret
+;
+; RV64ZVE32F-LABEL: mgather_v1f16:
+; RV64ZVE32F: # %bb.0:
+; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; RV64ZVE32F-NEXT: vfirst.m a1, v0
+; RV64ZVE32F-NEXT: bnez a1, .LBB68_2
+; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vle16.v v8, (a0)
+; RV64ZVE32F-NEXT: .LBB68_2: # %else
+; RV64ZVE32F-NEXT: ret
+;
+; RV32ZVE32F-ZVFHMIN-LABEL: mgather_v1f16:
+; RV32ZVE32F-ZVFHMIN: # %bb.0:
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vfirst.m a0, v0
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB68_2
+; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.load
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vle16.v v9, (a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_2: # %else
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+ %v = call <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr> %ptrs, i32 2, <1 x i1> %m, <1 x half> %passthru)
+ ret <1 x half> %v
+}
+
+declare <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x half>)
+
+define <2 x half> @mgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x half> %passthru) {
+; RV32V-ZVFH-LABEL: mgather_v2f16:
+; RV32V-ZVFH: # %bb.0:
+; RV32V-ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
+; RV32V-ZVFH-NEXT: vluxei32.v v9, (zero), v8, v0.t
+; RV32V-ZVFH-NEXT: vmv1r.v v8, v9
+; RV32V-ZVFH-NEXT: ret
+;
+; RV64V-ZVFH-LABEL: mgather_v2f16:
+; RV64V-ZVFH: # %bb.0:
+; RV64V-ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
+; RV64V-ZVFH-NEXT: vluxei64.v v9, (zero), v8, v0.t
+; RV64V-ZVFH-NEXT: vmv1r.v v8, v9
+; RV64V-ZVFH-NEXT: ret
+;
+; RV32V-ZVFHMIN-LABEL: mgather_v2f16:
+; RV32V-ZVFHMIN: # %bb.0:
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v0
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 1
+; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB69_3
+; RV32V-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV32V-ZVFHMIN-NEXT: andi a0, a0, 2
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB69_4
+; RV32V-ZVFHMIN-NEXT: .LBB69_2: # %else2
+; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV32V-ZVFHMIN-NEXT: ret
+; RV32V-ZVFHMIN-NEXT: .LBB69_3: # %cond.load
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV32V-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v9, a1
+; RV32V-ZVFHMIN-NEXT: andi a0, a0, 2
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB69_2
+; RV32V-ZVFHMIN-NEXT: .LBB69_4: # %cond.load1
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
+; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV32V-ZVFHMIN-NEXT: ret
+;
+; RV64V-ZVFHMIN-LABEL: mgather_v2f16:
+; RV64V-ZVFHMIN: # %bb.0:
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v0
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 1
+; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB69_3
+; RV64V-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV64V-ZVFHMIN-NEXT: andi a0, a0, 2
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB69_4
+; RV64V-ZVFHMIN-NEXT: .LBB69_2: # %else2
+; RV64V-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV64V-ZVFHMIN-NEXT: ret
+; RV64V-ZVFHMIN-NEXT: .LBB69_3: # %cond.load
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV64V-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v9, a1
+; RV64V-ZVFHMIN-NEXT: andi a0, a0, 2
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB69_2
+; RV64V-ZVFHMIN-NEXT: .LBB69_4: # %cond.load1
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
+; RV64V-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV64V-ZVFHMIN-NEXT: ret
+;
+; RV32ZVE32F-ZVFH-LABEL: mgather_v2f16:
+; RV32ZVE32F-ZVFH: # %bb.0:
+; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
+; RV32ZVE32F-ZVFH-NEXT: vluxei32.v v9, (zero), v8, v0.t
+; RV32ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9
+; RV32ZVE32F-ZVFH-NEXT: ret
+;
+; RV64ZVE32F-ZVFH-LABEL: mgather_v2f16:
+; RV64ZVE32F-ZVFH: # %bb.0:
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v0
+; RV64ZVE32F-ZVFH-NEXT: andi a3, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: bnez a3, .LBB69_3
+; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 2
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_4
+; RV64ZVE32F-ZVFH-NEXT: .LBB69_2: # %else2
+; RV64ZVE32F-ZVFH-NEXT: ret
+; RV64ZVE32F-ZVFH-NEXT: .LBB69_3: # %cond.load
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_2
+; RV64ZVE32F-ZVFH-NEXT: .LBB69_4: # %cond.load1
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a1)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 1
+; RV64ZVE32F-ZVFH-NEXT: ret
+;
+; RV32ZVE32F-ZVFHMIN-LABEL: mgather_v2f16:
+; RV32ZVE32F-ZVFHMIN: # %bb.0:
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v0
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB69_3
+; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a0, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB69_4
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_2: # %else2
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_3: # %cond.load
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a1
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a0, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB69_2
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_4: # %cond.load1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+;
+; RV64ZVE32F-ZVFHMIN-LABEL: mgather_v2f16:
+; RV64ZVE32F-ZVFHMIN: # %bb.0:
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v0
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a3, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a3, .LBB69_3
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_4
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_2: # %else2
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_3: # %cond.load
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_2
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_4: # %cond.load1
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a1)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a0
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+ %v = call <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x half> %passthru)
+ ret <2 x half> %v
+}
+
+declare <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x half>)
+
+define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passthru) {
+; RV32V-ZVFH-LABEL: mgather_v4f16:
+; RV32V-ZVFH: # %bb.0:
+; RV32V-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; RV32V-ZVFH-NEXT: vluxei32.v v9, (zero), v8, v0.t
+; RV32V-ZVFH-NEXT: vmv1r.v v8, v9
+; RV32V-ZVFH-NEXT: ret
+;
+; RV64V-ZVFH-LABEL: mgather_v4f16:
+; RV64V-ZVFH: # %bb.0:
+; RV64V-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; RV64V-ZVFH-NEXT: vluxei64.v v10, (zero), v8, v0.t
+; RV64V-ZVFH-NEXT: vmv1r.v v8, v10
+; RV64V-ZVFH-NEXT: ret
+;
+; RV32V-ZVFHMIN-LABEL: mgather_v4f16:
+; RV32V-ZVFHMIN: # %bb.0:
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v0
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 1
+; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB70_5
+; RV32V-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 2
+; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB70_6
+; RV32V-ZVFHMIN-NEXT: .LBB70_2: # %else2
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 4
+; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB70_7
+; RV32V-ZVFHMIN-NEXT: .LBB70_3: # %else5
+; RV32V-ZVFHMIN-NEXT: andi a0, a0, 8
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB70_8
+; RV32V-ZVFHMIN-NEXT: .LBB70_4: # %else8
+; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV32V-ZVFHMIN-NEXT: ret
+; RV32V-ZVFHMIN-NEXT: .LBB70_5: # %cond.load
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV32V-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v9, a1
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 2
+; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB70_2
+; RV32V-ZVFHMIN-NEXT: .LBB70_6: # %cond.load1
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v10
+; RV32V-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v10, a1
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v10, 1
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 4
+; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB70_3
+; RV32V-ZVFHMIN-NEXT: .LBB70_7: # %cond.load4
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 2
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v10
+; RV32V-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v10, a1
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v10, 2
+; RV32V-ZVFHMIN-NEXT: andi a0, a0, 8
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB70_4
+; RV32V-ZVFHMIN-NEXT: .LBB70_8: # %cond.load7
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
+; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV32V-ZVFHMIN-NEXT: ret
+;
+; RV64V-ZVFHMIN-LABEL: mgather_v4f16:
+; RV64V-ZVFHMIN: # %bb.0:
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v0
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 1
+; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB70_5
+; RV64V-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 2
+; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB70_6
+; RV64V-ZVFHMIN-NEXT: .LBB70_2: # %else2
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 4
+; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB70_7
+; RV64V-ZVFHMIN-NEXT: .LBB70_3: # %else5
+; RV64V-ZVFHMIN-NEXT: andi a0, a0, 8
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB70_8
+; RV64V-ZVFHMIN-NEXT: .LBB70_4: # %else8
+; RV64V-ZVFHMIN-NEXT: vmv1r.v v8, v10
+; RV64V-ZVFHMIN-NEXT: ret
+; RV64V-ZVFHMIN-NEXT: .LBB70_5: # %cond.load
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV64V-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v10, a1
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 2
+; RV64V-ZVFHMIN-NEXT: beqz a1, .LBB70_2
+; RV64V-ZVFHMIN-NEXT: .LBB70_6: # %cond.load1
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 1
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v11
+; RV64V-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v11, a1
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v10, v11, 1
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 4
+; RV64V-ZVFHMIN-NEXT: beqz a1, .LBB70_3
+; RV64V-ZVFHMIN-NEXT: .LBB70_7: # %cond.load4
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v12, v8, 2
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v12
+; RV64V-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v11, a1
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v10, v11, 2
+; RV64V-ZVFHMIN-NEXT: andi a0, a0, 8
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB70_4
+; RV64V-ZVFHMIN-NEXT: .LBB70_8: # %cond.load7
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v10, v8, 3
+; RV64V-ZVFHMIN-NEXT: vmv1r.v v8, v10
+; RV64V-ZVFHMIN-NEXT: ret
+;
+; RV32ZVE32F-ZVFH-LABEL: mgather_v4f16:
+; RV32ZVE32F-ZVFH: # %bb.0:
+; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; RV32ZVE32F-ZVFH-NEXT: vluxei32.v v9, (zero), v8, v0.t
+; RV32ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9
+; RV32ZVE32F-ZVFH-NEXT: ret
+;
+; RV64ZVE32F-ZVFH-LABEL: mgather_v4f16:
+; RV64ZVE32F-ZVFH: # %bb.0:
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_5
+; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_6
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_2: # %else2
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_7
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_3: # %else5
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, 8
+; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB70_8
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_4: # %else8
+; RV64ZVE32F-ZVFH-NEXT: ret
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_5: # %cond.load
+; RV64ZVE32F-ZVFH-NEXT: ld a2, 0(a0)
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_2
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_6: # %cond.load1
+; RV64ZVE32F-ZVFH-NEXT: ld a2, 8(a0)
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 1
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_3
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_7: # %cond.load4
+; RV64ZVE32F-ZVFH-NEXT: ld a2, 16(a0)
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 2
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, 8
+; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB70_4
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_8: # %cond.load7
+; RV64ZVE32F-ZVFH-NEXT: ld a0, 24(a0)
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0)
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 3
+; RV64ZVE32F-ZVFH-NEXT: ret
+;
+; RV32ZVE32F-ZVFHMIN-LABEL: mgather_v4f16:
+; RV32ZVE32F-ZVFHMIN: # %bb.0:
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v0
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB70_5
+; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB70_6
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_2: # %else2
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB70_7
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_3: # %else5
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a0, 8
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB70_8
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_4: # %else8
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_5: # %cond.load
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a1
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB70_2
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_6: # %cond.load1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB70_3
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_7: # %cond.load4
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a0, 8
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB70_4
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_8: # %cond.load7
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+;
+; RV64ZVE32F-ZVFHMIN-LABEL: mgather_v4f16:
+; RV64ZVE32F-ZVFHMIN: # %bb.0:
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_5
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_6
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_2: # %else2
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_7
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_3: # %else5
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB70_8
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_4: # %else8
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_5: # %cond.load
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 0(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_2
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_6: # %cond.load1
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 8(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_3
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_7: # %cond.load4
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 16(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB70_4
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_8: # %cond.load7
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 24(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a0
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 3
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+ %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %m, <4 x half> %passthru)
+ ret <4 x half> %v
+}
+
+define <4 x half> @mgather_truemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) {
+; RV32V-ZVFH-LABEL: mgather_truemask_v4f16:
+; RV32V-ZVFH: # %bb.0:
+; RV32V-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32V-ZVFH-NEXT: vluxei32.v v9, (zero), v8
+; RV32V-ZVFH-NEXT: vmv1r.v v8, v9
+; RV32V-ZVFH-NEXT: ret
+;
+; RV64V-ZVFH-LABEL: mgather_truemask_v4f16:
+; RV64V-ZVFH: # %bb.0:
+; RV64V-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64V-ZVFH-NEXT: vluxei64.v v10, (zero), v8
+; RV64V-ZVFH-NEXT: vmv1r.v v8, v10
+; RV64V-ZVFH-NEXT: ret
+;
+; RV32V-ZVFHMIN-LABEL: mgather_truemask_v4f16:
+; RV32V-ZVFHMIN: # %bb.0:
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a3, v8
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV32V-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV32V-ZVFHMIN-NEXT: lh a3, 0(a3)
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.v.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a1
+; RV32V-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a2
+; RV32V-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a3
+; RV32V-ZVFHMIN-NEXT: ret
+;
+; RV64V-ZVFHMIN-LABEL: mgather_truemask_v4f16:
+; RV64V-ZVFHMIN: # %bb.0:
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v10
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 2
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v10
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a3, v8
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV64V-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64V-ZVFHMIN-NEXT: lh a3, 0(a3)
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.v.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a1
+; RV64V-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a2
+; RV64V-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a3
+; RV64V-ZVFHMIN-NEXT: ret
+;
+; RV32ZVE32F-ZVFH-LABEL: mgather_truemask_v4f16:
+; RV32ZVE32F-ZVFH: # %bb.0:
+; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32ZVE32F-ZVFH-NEXT: vluxei32.v v9, (zero), v8
+; RV32ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9
+; RV32ZVE32F-ZVFH-NEXT: ret
+;
+; RV64ZVE32F-ZVFH-LABEL: mgather_truemask_v4f16:
+; RV64ZVE32F-ZVFH: # %bb.0:
+; RV64ZVE32F-ZVFH-NEXT: ld a1, 0(a0)
+; RV64ZVE32F-ZVFH-NEXT: ld a2, 8(a0)
+; RV64ZVE32F-ZVFH-NEXT: ld a3, 16(a0)
+; RV64ZVE32F-ZVFH-NEXT: ld a0, 24(a0)
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a1)
+; RV64ZVE32F-ZVFH-NEXT: flh fa4, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: flh fa3, 0(a3)
+; RV64ZVE32F-ZVFH-NEXT: flh fa2, 0(a0)
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.v.f v8, fa5
+; RV64ZVE32F-ZVFH-NEXT: vfslide1down.vf v8, v8, fa4
+; RV64ZVE32F-ZVFH-NEXT: vfslide1down.vf v8, v8, fa3
+; RV64ZVE32F-ZVFH-NEXT: vfslide1down.vf v8, v8, fa2
+; RV64ZVE32F-ZVFH-NEXT: ret
+;
+; RV32ZVE32F-ZVFHMIN-LABEL: mgather_truemask_v4f16:
+; RV32ZVE32F-ZVFHMIN: # %bb.0:
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a3, 0(a3)
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.v.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a1
+; RV32ZVE32F-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a2
+; RV32ZVE32F-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a3
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+;
+; RV64ZVE32F-ZVFHMIN-LABEL: mgather_truemask_v4f16:
+; RV64ZVE32F-ZVFHMIN: # %bb.0:
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a1, 0(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 8(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a3, 16(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 24(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a3, 0(a3)
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.v.x v8, a1
+; RV64ZVE32F-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+ %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1), <4 x half> %passthru)
+ ret <4 x half> %v
+}
+
+define <4 x half> @mgather_falsemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) {
+; RV32-LABEL: mgather_falsemask_v4f16:
+; RV32: # %bb.0:
+; RV32-NEXT: vmv1r.v v8, v9
+; RV32-NEXT: ret
+;
+; RV64V-LABEL: mgather_falsemask_v4f16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vmv1r.v v8, v10
+; RV64V-NEXT: ret
+;
+; RV64ZVE32F-LABEL: mgather_falsemask_v4f16:
+; RV64ZVE32F: # %bb.0:
+; RV64ZVE32F-NEXT: ret
+ %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer, <4 x half> %passthru)
+ ret <4 x half> %v
+}
+
+declare <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x half>)
+
+define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passthru) {
+; RV32V-ZVFH-LABEL: mgather_v8f16:
+; RV32V-ZVFH: # %bb.0:
+; RV32V-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, mu
+; RV32V-ZVFH-NEXT: vluxei32.v v10, (zero), v8, v0.t
+; RV32V-ZVFH-NEXT: vmv.v.v v8, v10
+; RV32V-ZVFH-NEXT: ret
+;
+; RV64V-ZVFH-LABEL: mgather_v8f16:
+; RV64V-ZVFH: # %bb.0:
+; RV64V-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, mu
+; RV64V-ZVFH-NEXT: vluxei64.v v12, (zero), v8, v0.t
+; RV64V-ZVFH-NEXT: vmv.v.v v8, v12
+; RV64V-ZVFH-NEXT: ret
+;
+; RV32V-ZVFHMIN-LABEL: mgather_v8f16:
+; RV32V-ZVFHMIN: # %bb.0:
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v0
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 1
+; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB73_9
+; RV32V-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 2
+; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB73_10
+; RV32V-ZVFHMIN-NEXT: .LBB73_2: # %else2
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 4
+; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB73_11
+; RV32V-ZVFHMIN-NEXT: .LBB73_3: # %else5
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 8
+; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB73_12
+; RV32V-ZVFHMIN-NEXT: .LBB73_4: # %else8
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 16
+; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB73_13
+; RV32V-ZVFHMIN-NEXT: .LBB73_5: # %else11
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 32
+; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB73_14
+; RV32V-ZVFHMIN-NEXT: .LBB73_6: # %else14
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 64
+; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB73_15
+; RV32V-ZVFHMIN-NEXT: .LBB73_7: # %else17
+; RV32V-ZVFHMIN-NEXT: andi a0, a0, -128
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB73_16
+; RV32V-ZVFHMIN-NEXT: .LBB73_8: # %else20
+; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v10
+; RV32V-ZVFHMIN-NEXT: ret
+; RV32V-ZVFHMIN-NEXT: .LBB73_9: # %cond.load
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV32V-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v10, a1
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 2
+; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB73_2
+; RV32V-ZVFHMIN-NEXT: .LBB73_10: # %cond.load1
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 1
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v11
+; RV32V-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v11, a1
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v10, v11, 1
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 4
+; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB73_3
+; RV32V-ZVFHMIN-NEXT: .LBB73_11: # %cond.load4
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 2
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v11
+; RV32V-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v11, a1
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v10, v11, 2
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 8
+; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB73_4
+; RV32V-ZVFHMIN-NEXT: .LBB73_12: # %cond.load7
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 3
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v11
+; RV32V-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v11, a1
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v10, v11, 3
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 16
+; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB73_5
+; RV32V-ZVFHMIN-NEXT: .LBB73_13: # %cond.load10
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v8, 4
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v12
+; RV32V-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v11, a1
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v10, v11, 4
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 32
+; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB73_6
+; RV32V-ZVFHMIN-NEXT: .LBB73_14: # %cond.load13
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v8, 5
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v12
+; RV32V-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v11, a1
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v10, v11, 5
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 64
+; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB73_7
+; RV32V-ZVFHMIN-NEXT: .LBB73_15: # %cond.load16
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v8, 6
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v12
+; RV32V-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v11, a1
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v10, v11, 6
+; RV32V-ZVFHMIN-NEXT: andi a0, a0, -128
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB73_8
+; RV32V-ZVFHMIN-NEXT: .LBB73_16: # %cond.load19
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v10, v8, 7
+; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v10
+; RV32V-ZVFHMIN-NEXT: ret
+;
+; RV64V-ZVFHMIN-LABEL: mgather_v8f16:
+; RV64V-ZVFHMIN: # %bb.0:
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v0
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 1
+; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB73_11
+; RV64V-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 2
+; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB73_12
+; RV64V-ZVFHMIN-NEXT: .LBB73_2: # %else2
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 4
+; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB73_13
+; RV64V-ZVFHMIN-NEXT: .LBB73_3: # %else5
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 8
+; RV64V-ZVFHMIN-NEXT: beqz a1, .LBB73_5
+; RV64V-ZVFHMIN-NEXT: .LBB73_4: # %cond.load7
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v14, v8, 3
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v14
+; RV64V-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v13, a1
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v12, v13, 3
+; RV64V-ZVFHMIN-NEXT: .LBB73_5: # %else8
+; RV64V-ZVFHMIN-NEXT: addi sp, sp, -320
+; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa_offset 320
+; RV64V-ZVFHMIN-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
+; RV64V-ZVFHMIN-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
+; RV64V-ZVFHMIN-NEXT: .cfi_offset ra, -8
+; RV64V-ZVFHMIN-NEXT: .cfi_offset s0, -16
+; RV64V-ZVFHMIN-NEXT: addi s0, sp, 320
+; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa s0, 0
+; RV64V-ZVFHMIN-NEXT: andi sp, sp, -64
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 16
+; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB73_14
+; RV64V-ZVFHMIN-NEXT: # %bb.6: # %else11
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 32
+; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB73_15
+; RV64V-ZVFHMIN-NEXT: .LBB73_7: # %else14
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 64
+; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB73_16
+; RV64V-ZVFHMIN-NEXT: .LBB73_8: # %else17
+; RV64V-ZVFHMIN-NEXT: andi a0, a0, -128
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB73_10
+; RV64V-ZVFHMIN-NEXT: .LBB73_9: # %cond.load19
+; RV64V-ZVFHMIN-NEXT: mv a0, sp
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v8, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 56(sp)
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v12, v8, 7
+; RV64V-ZVFHMIN-NEXT: .LBB73_10: # %else20
+; RV64V-ZVFHMIN-NEXT: vmv1r.v v8, v12
+; RV64V-ZVFHMIN-NEXT: addi sp, s0, -320
+; RV64V-ZVFHMIN-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
+; RV64V-ZVFHMIN-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
+; RV64V-ZVFHMIN-NEXT: addi sp, sp, 320
+; RV64V-ZVFHMIN-NEXT: ret
+; RV64V-ZVFHMIN-NEXT: .LBB73_11: # %cond.load
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV64V-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v12, a1
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 2
+; RV64V-ZVFHMIN-NEXT: beqz a1, .LBB73_2
+; RV64V-ZVFHMIN-NEXT: .LBB73_12: # %cond.load1
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v13, v8, 1
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v13
+; RV64V-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v13, a1
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v12, v13, 1
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 4
+; RV64V-ZVFHMIN-NEXT: beqz a1, .LBB73_3
+; RV64V-ZVFHMIN-NEXT: .LBB73_13: # %cond.load4
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v14, v8, 2
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v14
+; RV64V-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v13, a1
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v12, v13, 2
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 8
+; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB73_4
+; RV64V-ZVFHMIN-NEXT: j .LBB73_5
+; RV64V-ZVFHMIN-NEXT: .LBB73_14: # %cond.load10
+; RV64V-ZVFHMIN-NEXT: addi a1, sp, 192
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v8, (a1)
+; RV64V-ZVFHMIN-NEXT: ld a1, 224(sp)
+; RV64V-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v13, a1
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v12, v13, 4
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 32
+; RV64V-ZVFHMIN-NEXT: beqz a1, .LBB73_7
+; RV64V-ZVFHMIN-NEXT: .LBB73_15: # %cond.load13
+; RV64V-ZVFHMIN-NEXT: addi a1, sp, 128
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v8, (a1)
+; RV64V-ZVFHMIN-NEXT: ld a1, 168(sp)
+; RV64V-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v13, a1
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v12, v13, 5
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 64
+; RV64V-ZVFHMIN-NEXT: beqz a1, .LBB73_8
+; RV64V-ZVFHMIN-NEXT: .LBB73_16: # %cond.load16
+; RV64V-ZVFHMIN-NEXT: addi a1, sp, 64
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v8, (a1)
+; RV64V-ZVFHMIN-NEXT: ld a1, 112(sp)
+; RV64V-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v13, a1
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v12, v13, 6
+; RV64V-ZVFHMIN-NEXT: andi a0, a0, -128
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB73_9
+; RV64V-ZVFHMIN-NEXT: j .LBB73_10
+;
+; RV32ZVE32F-ZVFH-LABEL: mgather_v8f16:
+; RV32ZVE32F-ZVFH: # %bb.0:
+; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, mu
+; RV32ZVE32F-ZVFH-NEXT: vluxei32.v v10, (zero), v8, v0.t
+; RV32ZVE32F-ZVFH-NEXT: vmv.v.v v8, v10
+; RV32ZVE32F-ZVFH-NEXT: ret
+;
+; RV64ZVE32F-ZVFH-LABEL: mgather_v8f16:
+; RV64ZVE32F-ZVFH: # %bb.0:
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_9
+; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_10
+; RV64ZVE32F-ZVFH-NEXT: .LBB73_2: # %else2
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_11
+; RV64ZVE32F-ZVFH-NEXT: .LBB73_3: # %else5
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_12
+; RV64ZVE32F-ZVFH-NEXT: .LBB73_4: # %else8
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_13
+; RV64ZVE32F-ZVFH-NEXT: .LBB73_5: # %else11
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_14
+; RV64ZVE32F-ZVFH-NEXT: .LBB73_6: # %else14
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB73_15
+; RV64ZVE32F-ZVFH-NEXT: .LBB73_7: # %else17
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB73_16
+; RV64ZVE32F-ZVFH-NEXT: .LBB73_8: # %else20
+; RV64ZVE32F-ZVFH-NEXT: ret
+; RV64ZVE32F-ZVFH-NEXT: .LBB73_9: # %cond.load
+; RV64ZVE32F-ZVFH-NEXT: ld a2, 0(a0)
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_2
+; RV64ZVE32F-ZVFH-NEXT: .LBB73_10: # %cond.load1
+; RV64ZVE32F-ZVFH-NEXT: ld a2, 8(a0)
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 1
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_3
+; RV64ZVE32F-ZVFH-NEXT: .LBB73_11: # %cond.load4
+; RV64ZVE32F-ZVFH-NEXT: ld a2, 16(a0)
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 2
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_4
+; RV64ZVE32F-ZVFH-NEXT: .LBB73_12: # %cond.load7
+; RV64ZVE32F-ZVFH-NEXT: ld a2, 24(a0)
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 3
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_5
+; RV64ZVE32F-ZVFH-NEXT: .LBB73_13: # %cond.load10
+; RV64ZVE32F-ZVFH-NEXT: ld a2, 32(a0)
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 4
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_6
+; RV64ZVE32F-ZVFH-NEXT: .LBB73_14: # %cond.load13
+; RV64ZVE32F-ZVFH-NEXT: ld a2, 40(a0)
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 5
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB73_7
+; RV64ZVE32F-ZVFH-NEXT: .LBB73_15: # %cond.load16
+; RV64ZVE32F-ZVFH-NEXT: ld a2, 48(a0)
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 6
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB73_8
+; RV64ZVE32F-ZVFH-NEXT: .LBB73_16: # %cond.load19
+; RV64ZVE32F-ZVFH-NEXT: ld a0, 56(a0)
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0)
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 7
+; RV64ZVE32F-ZVFH-NEXT: ret
+;
+; RV32ZVE32F-ZVFHMIN-LABEL: mgather_v8f16:
+; RV32ZVE32F-ZVFHMIN: # %bb.0:
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v0
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB73_9
+; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB73_10
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_2: # %else2
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB73_11
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_3: # %else5
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 8
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB73_12
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_4: # %else8
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 16
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB73_13
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_5: # %else11
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 32
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB73_14
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_6: # %else14
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 64
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB73_15
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_7: # %else17
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a0, -128
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB73_16
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_8: # %else20
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_9: # %cond.load
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a1
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB73_2
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_10: # %cond.load1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v11
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v10, v11, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB73_3
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_11: # %cond.load4
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v11
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v10, v11, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 8
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB73_4
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_12: # %cond.load7
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 3
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v11
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v10, v11, 3
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 16
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB73_5
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_13: # %cond.load10
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v8, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v10, v11, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 32
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB73_6
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_14: # %cond.load13
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v8, 5
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v10, v11, 5
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 64
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB73_7
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_15: # %cond.load16
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v8, 6
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v10, v11, 6
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a0, -128
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB73_8
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_16: # %cond.load19
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v10, v8, 7
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+;
+; RV64ZVE32F-ZVFHMIN-LABEL: mgather_v8f16:
+; RV64ZVE32F-ZVFHMIN: # %bb.0:
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_9
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_10
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_2: # %else2
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_11
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_3: # %else5
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_12
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_4: # %else8
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_13
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_5: # %else11
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_14
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_6: # %else14
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB73_15
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_7: # %else17
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB73_16
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_8: # %else20
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_9: # %cond.load
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 0(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_2
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_10: # %cond.load1
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 8(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_3
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_11: # %cond.load4
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 16(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_4
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_12: # %cond.load7
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 24(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 3
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_5
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_13: # %cond.load10
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 32(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_6
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_14: # %cond.load13
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 40(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 5
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB73_7
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_15: # %cond.load16
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 48(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 6
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB73_8
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB73_16: # %cond.load19
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 56(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a0
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v8, v9, 7
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+ %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
+ ret <8 x half> %v
+}
+
+define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) {
+; RV32V-ZVFH-LABEL: mgather_baseidx_v8i8_v8f16:
+; RV32V-ZVFH: # %bb.0:
+; RV32V-ZVFH-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32V-ZVFH-NEXT: vsext.vf4 v10, v8
+; RV32V-ZVFH-NEXT: vadd.vv v10, v10, v10
+; RV32V-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; RV32V-ZVFH-NEXT: vluxei32.v v9, (a0), v10, v0.t
+; RV32V-ZVFH-NEXT: vmv.v.v v8, v9
+; RV32V-ZVFH-NEXT: ret
+;
+; RV64V-ZVFH-LABEL: mgather_baseidx_v8i8_v8f16:
+; RV64V-ZVFH: # %bb.0:
+; RV64V-ZVFH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFH-NEXT: vsext.vf8 v12, v8
+; RV64V-ZVFH-NEXT: vadd.vv v12, v12, v12
+; RV64V-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; RV64V-ZVFH-NEXT: vluxei64.v v9, (a0), v12, v0.t
+; RV64V-ZVFH-NEXT: vmv.v.v v8, v9
+; RV64V-ZVFH-NEXT: ret
+;
+; RV32V-ZVFHMIN-LABEL: mgather_baseidx_v8i8_v8f16:
+; RV32V-ZVFHMIN: # %bb.0:
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vsext.vf4 v10, v8
+; RV32V-ZVFHMIN-NEXT: vadd.vv v10, v10, v10
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV32V-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
+; RV32V-ZVFHMIN-NEXT: bnez a2, .LBB74_9
+; RV32V-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB74_10
+; RV32V-ZVFHMIN-NEXT: .LBB74_2: # %else2
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB74_11
+; RV32V-ZVFHMIN-NEXT: .LBB74_3: # %else5
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB74_12
+; RV32V-ZVFHMIN-NEXT: .LBB74_4: # %else8
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB74_13
+; RV32V-ZVFHMIN-NEXT: .LBB74_5: # %else11
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB74_14
+; RV32V-ZVFHMIN-NEXT: .LBB74_6: # %else14
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB74_15
+; RV32V-ZVFHMIN-NEXT: .LBB74_7: # %else17
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB74_16
+; RV32V-ZVFHMIN-NEXT: .LBB74_8: # %else20
+; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV32V-ZVFHMIN-NEXT: ret
+; RV32V-ZVFHMIN-NEXT: .LBB74_9: # %cond.load
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v9, a0
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB74_2
+; RV32V-ZVFHMIN-NEXT: .LBB74_10: # %cond.load1
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB74_3
+; RV32V-ZVFHMIN-NEXT: .LBB74_11: # %cond.load4
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 2
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB74_4
+; RV32V-ZVFHMIN-NEXT: .LBB74_12: # %cond.load7
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 3
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB74_5
+; RV32V-ZVFHMIN-NEXT: .LBB74_13: # %cond.load10
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB74_6
+; RV32V-ZVFHMIN-NEXT: .LBB74_14: # %cond.load13
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB74_7
+; RV32V-ZVFHMIN-NEXT: .LBB74_15: # %cond.load16
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 6
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB74_8
+; RV32V-ZVFHMIN-NEXT: .LBB74_16: # %cond.load19
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v10, v10, 7
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
+; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV32V-ZVFHMIN-NEXT: ret
+;
+; RV64V-ZVFHMIN-LABEL: mgather_baseidx_v8i8_v8f16:
+; RV64V-ZVFHMIN: # %bb.0:
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vsext.vf8 v12, v8
+; RV64V-ZVFHMIN-NEXT: vadd.vv v12, v12, v12
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV64V-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vadd.vx v12, v12, a0
+; RV64V-ZVFHMIN-NEXT: bnez a2, .LBB74_11
+; RV64V-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB74_12
+; RV64V-ZVFHMIN-NEXT: .LBB74_2: # %else2
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB74_13
+; RV64V-ZVFHMIN-NEXT: .LBB74_3: # %else5
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB74_5
+; RV64V-ZVFHMIN-NEXT: .LBB74_4: # %cond.load7
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 3
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
+; RV64V-ZVFHMIN-NEXT: .LBB74_5: # %else8
+; RV64V-ZVFHMIN-NEXT: addi sp, sp, -320
+; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa_offset 320
+; RV64V-ZVFHMIN-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
+; RV64V-ZVFHMIN-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
+; RV64V-ZVFHMIN-NEXT: .cfi_offset ra, -8
+; RV64V-ZVFHMIN-NEXT: .cfi_offset s0, -16
+; RV64V-ZVFHMIN-NEXT: addi s0, sp, 320
+; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa s0, 0
+; RV64V-ZVFHMIN-NEXT: andi sp, sp, -64
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB74_14
+; RV64V-ZVFHMIN-NEXT: # %bb.6: # %else11
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB74_15
+; RV64V-ZVFHMIN-NEXT: .LBB74_7: # %else14
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB74_16
+; RV64V-ZVFHMIN-NEXT: .LBB74_8: # %else17
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB74_10
+; RV64V-ZVFHMIN-NEXT: .LBB74_9: # %cond.load19
+; RV64V-ZVFHMIN-NEXT: mv a0, sp
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 56(sp)
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
+; RV64V-ZVFHMIN-NEXT: .LBB74_10: # %else20
+; RV64V-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV64V-ZVFHMIN-NEXT: addi sp, s0, -320
+; RV64V-ZVFHMIN-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
+; RV64V-ZVFHMIN-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
+; RV64V-ZVFHMIN-NEXT: addi sp, sp, 320
+; RV64V-ZVFHMIN-NEXT: ret
+; RV64V-ZVFHMIN-NEXT: .LBB74_11: # %cond.load
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v9, a0
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB74_2
+; RV64V-ZVFHMIN-NEXT: .LBB74_12: # %cond.load1
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v12, 1
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB74_3
+; RV64V-ZVFHMIN-NEXT: .LBB74_13: # %cond.load4
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 2
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 2
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB74_4
+; RV64V-ZVFHMIN-NEXT: j .LBB74_5
+; RV64V-ZVFHMIN-NEXT: .LBB74_14: # %cond.load10
+; RV64V-ZVFHMIN-NEXT: addi a0, sp, 192
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 224(sp)
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB74_7
+; RV64V-ZVFHMIN-NEXT: .LBB74_15: # %cond.load13
+; RV64V-ZVFHMIN-NEXT: addi a0, sp, 128
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 168(sp)
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB74_8
+; RV64V-ZVFHMIN-NEXT: .LBB74_16: # %cond.load16
+; RV64V-ZVFHMIN-NEXT: addi a0, sp, 64
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 112(sp)
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 6
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB74_9
+; RV64V-ZVFHMIN-NEXT: j .LBB74_10
+;
+; RV32ZVE32F-ZVFH-LABEL: mgather_baseidx_v8i8_v8f16:
+; RV32ZVE32F-ZVFH: # %bb.0:
+; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32ZVE32F-ZVFH-NEXT: vsext.vf4 v10, v8
+; RV32ZVE32F-ZVFH-NEXT: vadd.vv v10, v10, v10
+; RV32ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; RV32ZVE32F-ZVFH-NEXT: vluxei32.v v9, (a0), v10, v0.t
+; RV32ZVE32F-ZVFH-NEXT: vmv.v.v v8, v9
+; RV32ZVE32F-ZVFH-NEXT: ret
+;
+; RV64ZVE32F-ZVFH-LABEL: mgather_baseidx_v8i8_v8f16:
+; RV64ZVE32F-ZVFH: # %bb.0:
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_2
+; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.load
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-ZVFH-NEXT: .LBB74_2: # %else
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_4
+; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.load1
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 1
+; RV64ZVE32F-ZVFH-NEXT: .LBB74_4: # %else2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 4
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_12
+; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_13
+; RV64ZVE32F-ZVFH-NEXT: .LBB74_6: # %else8
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_14
+; RV64ZVE32F-ZVFH-NEXT: .LBB74_7: # %else11
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_9
+; RV64ZVE32F-ZVFH-NEXT: .LBB74_8: # %cond.load13
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 5
+; RV64ZVE32F-ZVFH-NEXT: .LBB74_9: # %else14
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 2
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_15
+; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else17
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB74_16
+; RV64ZVE32F-ZVFH-NEXT: .LBB74_11: # %else20
+; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9
+; RV64ZVE32F-ZVFH-NEXT: ret
+; RV64ZVE32F-ZVFH-NEXT: .LBB74_12: # %cond.load4
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v11, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_6
+; RV64ZVE32F-ZVFH-NEXT: .LBB74_13: # %cond.load7
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 3
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_7
+; RV64ZVE32F-ZVFH-NEXT: .LBB74_14: # %cond.load10
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 4
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_8
+; RV64ZVE32F-ZVFH-NEXT: j .LBB74_9
+; RV64ZVE32F-ZVFH-NEXT: .LBB74_15: # %cond.load16
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 6
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB74_11
+; RV64ZVE32F-ZVFH-NEXT: .LBB74_16: # %cond.load19
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v8
+; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1
+; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 7
+; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9
+; RV64ZVE32F-ZVFH-NEXT: ret
+;
+; RV32ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_v8i8_v8f16:
+; RV32ZVE32F-ZVFHMIN: # %bb.0:
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vsext.vf4 v10, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: vadd.vv v10, v10, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_9
+; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB74_10
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_2: # %else2
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB74_11
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_3: # %else5
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB74_12
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_4: # %else8
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB74_13
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_5: # %else11
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB74_14
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_6: # %else14
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB74_15
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_7: # %else17
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB74_16
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_8: # %else20
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_9: # %cond.load
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB74_2
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_10: # %cond.load1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB74_3
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_11: # %cond.load4
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB74_4
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_12: # %cond.load7
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 3
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB74_5
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_13: # %cond.load10
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB74_6
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_14: # %cond.load13
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB74_7
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_15: # %cond.load16
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 6
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB74_8
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_16: # %cond.load19
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v10, 7
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+;
+; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_v8i8_v8f16:
+; RV64ZVE32F-ZVFHMIN: # %bb.0:
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_2
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.load
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_2: # %else
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_4
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.load1
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_4: # %else2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_12
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_13
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_6: # %else8
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_14
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_7: # %else11
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_9
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_8: # %cond.load13
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_9: # %else14
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_15
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else17
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB74_16
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_11: # %else20
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_12: # %cond.load4
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_6
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_13: # %cond.load7
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_7
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_14: # %cond.load10
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_8
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB74_9
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_15: # %cond.load16
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 6
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB74_11
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_16: # %cond.load19
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+ %ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs
+ %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
+ ret <8 x half> %v
+}
+
+define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) {
+; RV32V-ZVFH-LABEL: mgather_baseidx_sext_v8i8_v8f16:
+; RV32V-ZVFH: # %bb.0:
+; RV32V-ZVFH-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32V-ZVFH-NEXT: vsext.vf4 v10, v8
+; RV32V-ZVFH-NEXT: vadd.vv v10, v10, v10
+; RV32V-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; RV32V-ZVFH-NEXT: vluxei32.v v9, (a0), v10, v0.t
+; RV32V-ZVFH-NEXT: vmv.v.v v8, v9
+; RV32V-ZVFH-NEXT: ret
+;
+; RV64V-ZVFH-LABEL: mgather_baseidx_sext_v8i8_v8f16:
+; RV64V-ZVFH: # %bb.0:
+; RV64V-ZVFH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFH-NEXT: vsext.vf8 v12, v8
+; RV64V-ZVFH-NEXT: vadd.vv v12, v12, v12
+; RV64V-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; RV64V-ZVFH-NEXT: vluxei64.v v9, (a0), v12, v0.t
+; RV64V-ZVFH-NEXT: vmv.v.v v8, v9
+; RV64V-ZVFH-NEXT: ret
+;
+; RV32V-ZVFHMIN-LABEL: mgather_baseidx_sext_v8i8_v8f16:
+; RV32V-ZVFHMIN: # %bb.0:
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vsext.vf4 v10, v8
+; RV32V-ZVFHMIN-NEXT: vadd.vv v10, v10, v10
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV32V-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
+; RV32V-ZVFHMIN-NEXT: bnez a2, .LBB75_9
+; RV32V-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB75_10
+; RV32V-ZVFHMIN-NEXT: .LBB75_2: # %else2
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB75_11
+; RV32V-ZVFHMIN-NEXT: .LBB75_3: # %else5
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB75_12
+; RV32V-ZVFHMIN-NEXT: .LBB75_4: # %else8
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB75_13
+; RV32V-ZVFHMIN-NEXT: .LBB75_5: # %else11
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB75_14
+; RV32V-ZVFHMIN-NEXT: .LBB75_6: # %else14
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB75_15
+; RV32V-ZVFHMIN-NEXT: .LBB75_7: # %else17
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB75_16
+; RV32V-ZVFHMIN-NEXT: .LBB75_8: # %else20
+; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV32V-ZVFHMIN-NEXT: ret
+; RV32V-ZVFHMIN-NEXT: .LBB75_9: # %cond.load
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v9, a0
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB75_2
+; RV32V-ZVFHMIN-NEXT: .LBB75_10: # %cond.load1
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB75_3
+; RV32V-ZVFHMIN-NEXT: .LBB75_11: # %cond.load4
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 2
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB75_4
+; RV32V-ZVFHMIN-NEXT: .LBB75_12: # %cond.load7
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 3
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB75_5
+; RV32V-ZVFHMIN-NEXT: .LBB75_13: # %cond.load10
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB75_6
+; RV32V-ZVFHMIN-NEXT: .LBB75_14: # %cond.load13
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB75_7
+; RV32V-ZVFHMIN-NEXT: .LBB75_15: # %cond.load16
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 6
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB75_8
+; RV32V-ZVFHMIN-NEXT: .LBB75_16: # %cond.load19
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v10, v10, 7
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
+; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV32V-ZVFHMIN-NEXT: ret
+;
+; RV64V-ZVFHMIN-LABEL: mgather_baseidx_sext_v8i8_v8f16:
+; RV64V-ZVFHMIN: # %bb.0:
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vsext.vf8 v12, v8
+; RV64V-ZVFHMIN-NEXT: vadd.vv v12, v12, v12
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV64V-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vadd.vx v12, v12, a0
+; RV64V-ZVFHMIN-NEXT: bnez a2, .LBB75_11
+; RV64V-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB75_12
+; RV64V-ZVFHMIN-NEXT: .LBB75_2: # %else2
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB75_13
+; RV64V-ZVFHMIN-NEXT: .LBB75_3: # %else5
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB75_5
+; RV64V-ZVFHMIN-NEXT: .LBB75_4: # %cond.load7
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 3
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
+; RV64V-ZVFHMIN-NEXT: .LBB75_5: # %else8
+; RV64V-ZVFHMIN-NEXT: addi sp, sp, -320
+; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa_offset 320
+; RV64V-ZVFHMIN-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
+; RV64V-ZVFHMIN-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
+; RV64V-ZVFHMIN-NEXT: .cfi_offset ra, -8
+; RV64V-ZVFHMIN-NEXT: .cfi_offset s0, -16
+; RV64V-ZVFHMIN-NEXT: addi s0, sp, 320
+; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa s0, 0
+; RV64V-ZVFHMIN-NEXT: andi sp, sp, -64
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB75_14
+; RV64V-ZVFHMIN-NEXT: # %bb.6: # %else11
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB75_15
+; RV64V-ZVFHMIN-NEXT: .LBB75_7: # %else14
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB75_16
+; RV64V-ZVFHMIN-NEXT: .LBB75_8: # %else17
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB75_10
+; RV64V-ZVFHMIN-NEXT: .LBB75_9: # %cond.load19
+; RV64V-ZVFHMIN-NEXT: mv a0, sp
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 56(sp)
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
+; RV64V-ZVFHMIN-NEXT: .LBB75_10: # %else20
+; RV64V-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV64V-ZVFHMIN-NEXT: addi sp, s0, -320
+; RV64V-ZVFHMIN-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
+; RV64V-ZVFHMIN-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
+; RV64V-ZVFHMIN-NEXT: addi sp, sp, 320
+; RV64V-ZVFHMIN-NEXT: ret
+; RV64V-ZVFHMIN-NEXT: .LBB75_11: # %cond.load
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v9, a0
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB75_2
+; RV64V-ZVFHMIN-NEXT: .LBB75_12: # %cond.load1
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v12, 1
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB75_3
+; RV64V-ZVFHMIN-NEXT: .LBB75_13: # %cond.load4
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 2
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 2
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB75_4
+; RV64V-ZVFHMIN-NEXT: j .LBB75_5
+; RV64V-ZVFHMIN-NEXT: .LBB75_14: # %cond.load10
+; RV64V-ZVFHMIN-NEXT: addi a0, sp, 192
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 224(sp)
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB75_7
+; RV64V-ZVFHMIN-NEXT: .LBB75_15: # %cond.load13
+; RV64V-ZVFHMIN-NEXT: addi a0, sp, 128
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 168(sp)
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB75_8
+; RV64V-ZVFHMIN-NEXT: .LBB75_16: # %cond.load16
+; RV64V-ZVFHMIN-NEXT: addi a0, sp, 64
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 112(sp)
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 6
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB75_9
+; RV64V-ZVFHMIN-NEXT: j .LBB75_10
+;
+; RV32ZVE32F-ZVFH-LABEL: mgather_baseidx_sext_v8i8_v8f16:
+; RV32ZVE32F-ZVFH: # %bb.0:
+; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32ZVE32F-ZVFH-NEXT: vsext.vf4 v10, v8
+; RV32ZVE32F-ZVFH-NEXT: vadd.vv v10, v10, v10
+; RV32ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; RV32ZVE32F-ZVFH-NEXT: vluxei32.v v9, (a0), v10, v0.t
+; RV32ZVE32F-ZVFH-NEXT: vmv.v.v v8, v9
+; RV32ZVE32F-ZVFH-NEXT: ret
+;
+; RV64ZVE32F-ZVFH-LABEL: mgather_baseidx_sext_v8i8_v8f16:
+; RV64ZVE32F-ZVFH: # %bb.0:
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_2
+; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.load
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-ZVFH-NEXT: .LBB75_2: # %else
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_4
+; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.load1
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 1
+; RV64ZVE32F-ZVFH-NEXT: .LBB75_4: # %else2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 4
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_12
+; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_13
+; RV64ZVE32F-ZVFH-NEXT: .LBB75_6: # %else8
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_14
+; RV64ZVE32F-ZVFH-NEXT: .LBB75_7: # %else11
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_9
+; RV64ZVE32F-ZVFH-NEXT: .LBB75_8: # %cond.load13
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 5
+; RV64ZVE32F-ZVFH-NEXT: .LBB75_9: # %else14
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 2
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_15
+; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else17
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB75_16
+; RV64ZVE32F-ZVFH-NEXT: .LBB75_11: # %else20
+; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9
+; RV64ZVE32F-ZVFH-NEXT: ret
+; RV64ZVE32F-ZVFH-NEXT: .LBB75_12: # %cond.load4
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v11, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_6
+; RV64ZVE32F-ZVFH-NEXT: .LBB75_13: # %cond.load7
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 3
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_7
+; RV64ZVE32F-ZVFH-NEXT: .LBB75_14: # %cond.load10
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 4
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_8
+; RV64ZVE32F-ZVFH-NEXT: j .LBB75_9
+; RV64ZVE32F-ZVFH-NEXT: .LBB75_15: # %cond.load16
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 6
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB75_11
+; RV64ZVE32F-ZVFH-NEXT: .LBB75_16: # %cond.load19
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v8
+; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1
+; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 7
+; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9
+; RV64ZVE32F-ZVFH-NEXT: ret
+;
+; RV32ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_sext_v8i8_v8f16:
+; RV32ZVE32F-ZVFHMIN: # %bb.0:
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vsext.vf4 v10, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: vadd.vv v10, v10, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_9
+; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB75_10
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_2: # %else2
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB75_11
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_3: # %else5
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB75_12
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_4: # %else8
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB75_13
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_5: # %else11
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB75_14
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_6: # %else14
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB75_15
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_7: # %else17
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB75_16
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_8: # %else20
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_9: # %cond.load
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB75_2
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_10: # %cond.load1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB75_3
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_11: # %cond.load4
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB75_4
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_12: # %cond.load7
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 3
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB75_5
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_13: # %cond.load10
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB75_6
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_14: # %cond.load13
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB75_7
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_15: # %cond.load16
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 6
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB75_8
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_16: # %cond.load19
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v10, 7
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+;
+; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_sext_v8i8_v8f16:
+; RV64ZVE32F-ZVFHMIN: # %bb.0:
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_2
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.load
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_2: # %else
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_4
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.load1
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_4: # %else2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_12
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_13
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_6: # %else8
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_14
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_7: # %else11
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_9
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_8: # %cond.load13
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_9: # %else14
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_15
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else17
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB75_16
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_11: # %else20
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_12: # %cond.load4
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_6
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_13: # %cond.load7
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_7
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_14: # %cond.load10
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_8
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB75_9
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_15: # %cond.load16
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 6
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB75_11
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_16: # %cond.load19
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+ %eidxs = sext <8 x i8> %idxs to <8 x i16>
+ %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
+ %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
+ ret <8 x half> %v
+}
+
+define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) {
+; RV32V-ZVFH-LABEL: mgather_baseidx_zext_v8i8_v8f16:
+; RV32V-ZVFH: # %bb.0:
+; RV32V-ZVFH-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32V-ZVFH-NEXT: vwaddu.vv v10, v8, v8
+; RV32V-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; RV32V-ZVFH-NEXT: vluxei16.v v9, (a0), v10, v0.t
+; RV32V-ZVFH-NEXT: vmv.v.v v8, v9
+; RV32V-ZVFH-NEXT: ret
+;
+; RV64V-ZVFH-LABEL: mgather_baseidx_zext_v8i8_v8f16:
+; RV64V-ZVFH: # %bb.0:
+; RV64V-ZVFH-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64V-ZVFH-NEXT: vwaddu.vv v10, v8, v8
+; RV64V-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; RV64V-ZVFH-NEXT: vluxei16.v v9, (a0), v10, v0.t
+; RV64V-ZVFH-NEXT: vmv.v.v v8, v9
+; RV64V-ZVFH-NEXT: ret
+;
+; RV32V-ZVFHMIN-LABEL: mgather_baseidx_zext_v8i8_v8f16:
+; RV32V-ZVFHMIN: # %bb.0:
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vzext.vf4 v10, v8
+; RV32V-ZVFHMIN-NEXT: vadd.vv v10, v10, v10
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV32V-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
+; RV32V-ZVFHMIN-NEXT: bnez a2, .LBB76_9
+; RV32V-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB76_10
+; RV32V-ZVFHMIN-NEXT: .LBB76_2: # %else2
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB76_11
+; RV32V-ZVFHMIN-NEXT: .LBB76_3: # %else5
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB76_12
+; RV32V-ZVFHMIN-NEXT: .LBB76_4: # %else8
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB76_13
+; RV32V-ZVFHMIN-NEXT: .LBB76_5: # %else11
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB76_14
+; RV32V-ZVFHMIN-NEXT: .LBB76_6: # %else14
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB76_15
+; RV32V-ZVFHMIN-NEXT: .LBB76_7: # %else17
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB76_16
+; RV32V-ZVFHMIN-NEXT: .LBB76_8: # %else20
+; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV32V-ZVFHMIN-NEXT: ret
+; RV32V-ZVFHMIN-NEXT: .LBB76_9: # %cond.load
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v9, a0
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB76_2
+; RV32V-ZVFHMIN-NEXT: .LBB76_10: # %cond.load1
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB76_3
+; RV32V-ZVFHMIN-NEXT: .LBB76_11: # %cond.load4
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 2
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB76_4
+; RV32V-ZVFHMIN-NEXT: .LBB76_12: # %cond.load7
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 3
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB76_5
+; RV32V-ZVFHMIN-NEXT: .LBB76_13: # %cond.load10
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB76_6
+; RV32V-ZVFHMIN-NEXT: .LBB76_14: # %cond.load13
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB76_7
+; RV32V-ZVFHMIN-NEXT: .LBB76_15: # %cond.load16
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 6
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB76_8
+; RV32V-ZVFHMIN-NEXT: .LBB76_16: # %cond.load19
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v10, v10, 7
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
+; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV32V-ZVFHMIN-NEXT: ret
+;
+; RV64V-ZVFHMIN-LABEL: mgather_baseidx_zext_v8i8_v8f16:
+; RV64V-ZVFHMIN: # %bb.0:
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vzext.vf8 v12, v8
+; RV64V-ZVFHMIN-NEXT: vadd.vv v12, v12, v12
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV64V-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vadd.vx v12, v12, a0
+; RV64V-ZVFHMIN-NEXT: bnez a2, .LBB76_11
+; RV64V-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB76_12
+; RV64V-ZVFHMIN-NEXT: .LBB76_2: # %else2
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB76_13
+; RV64V-ZVFHMIN-NEXT: .LBB76_3: # %else5
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB76_5
+; RV64V-ZVFHMIN-NEXT: .LBB76_4: # %cond.load7
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 3
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
+; RV64V-ZVFHMIN-NEXT: .LBB76_5: # %else8
+; RV64V-ZVFHMIN-NEXT: addi sp, sp, -320
+; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa_offset 320
+; RV64V-ZVFHMIN-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
+; RV64V-ZVFHMIN-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
+; RV64V-ZVFHMIN-NEXT: .cfi_offset ra, -8
+; RV64V-ZVFHMIN-NEXT: .cfi_offset s0, -16
+; RV64V-ZVFHMIN-NEXT: addi s0, sp, 320
+; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa s0, 0
+; RV64V-ZVFHMIN-NEXT: andi sp, sp, -64
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB76_14
+; RV64V-ZVFHMIN-NEXT: # %bb.6: # %else11
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB76_15
+; RV64V-ZVFHMIN-NEXT: .LBB76_7: # %else14
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB76_16
+; RV64V-ZVFHMIN-NEXT: .LBB76_8: # %else17
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB76_10
+; RV64V-ZVFHMIN-NEXT: .LBB76_9: # %cond.load19
+; RV64V-ZVFHMIN-NEXT: mv a0, sp
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 56(sp)
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
+; RV64V-ZVFHMIN-NEXT: .LBB76_10: # %else20
+; RV64V-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV64V-ZVFHMIN-NEXT: addi sp, s0, -320
+; RV64V-ZVFHMIN-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
+; RV64V-ZVFHMIN-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
+; RV64V-ZVFHMIN-NEXT: addi sp, sp, 320
+; RV64V-ZVFHMIN-NEXT: ret
+; RV64V-ZVFHMIN-NEXT: .LBB76_11: # %cond.load
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v9, a0
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB76_2
+; RV64V-ZVFHMIN-NEXT: .LBB76_12: # %cond.load1
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v12, 1
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB76_3
+; RV64V-ZVFHMIN-NEXT: .LBB76_13: # %cond.load4
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 2
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 2
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB76_4
+; RV64V-ZVFHMIN-NEXT: j .LBB76_5
+; RV64V-ZVFHMIN-NEXT: .LBB76_14: # %cond.load10
+; RV64V-ZVFHMIN-NEXT: addi a0, sp, 192
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 224(sp)
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB76_7
+; RV64V-ZVFHMIN-NEXT: .LBB76_15: # %cond.load13
+; RV64V-ZVFHMIN-NEXT: addi a0, sp, 128
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 168(sp)
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB76_8
+; RV64V-ZVFHMIN-NEXT: .LBB76_16: # %cond.load16
+; RV64V-ZVFHMIN-NEXT: addi a0, sp, 64
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 112(sp)
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 6
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB76_9
+; RV64V-ZVFHMIN-NEXT: j .LBB76_10
+;
+; RV32ZVE32F-ZVFH-LABEL: mgather_baseidx_zext_v8i8_v8f16:
+; RV32ZVE32F-ZVFH: # %bb.0:
+; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32ZVE32F-ZVFH-NEXT: vwaddu.vv v10, v8, v8
+; RV32ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; RV32ZVE32F-ZVFH-NEXT: vluxei16.v v9, (a0), v10, v0.t
+; RV32ZVE32F-ZVFH-NEXT: vmv.v.v v8, v9
+; RV32ZVE32F-ZVFH-NEXT: ret
+;
+; RV64ZVE32F-ZVFH-LABEL: mgather_baseidx_zext_v8i8_v8f16:
+; RV64ZVE32F-ZVFH: # %bb.0:
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_2
+; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.load
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-ZVFH-NEXT: .LBB76_2: # %else
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_4
+; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.load1
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 1
+; RV64ZVE32F-ZVFH-NEXT: .LBB76_4: # %else2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 4
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_12
+; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_13
+; RV64ZVE32F-ZVFH-NEXT: .LBB76_6: # %else8
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_14
+; RV64ZVE32F-ZVFH-NEXT: .LBB76_7: # %else11
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_9
+; RV64ZVE32F-ZVFH-NEXT: .LBB76_8: # %cond.load13
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 5
+; RV64ZVE32F-ZVFH-NEXT: .LBB76_9: # %else14
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 2
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_15
+; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else17
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB76_16
+; RV64ZVE32F-ZVFH-NEXT: .LBB76_11: # %else20
+; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9
+; RV64ZVE32F-ZVFH-NEXT: ret
+; RV64ZVE32F-ZVFH-NEXT: .LBB76_12: # %cond.load4
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v11, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_6
+; RV64ZVE32F-ZVFH-NEXT: .LBB76_13: # %cond.load7
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 3
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_7
+; RV64ZVE32F-ZVFH-NEXT: .LBB76_14: # %cond.load10
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 4
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_8
+; RV64ZVE32F-ZVFH-NEXT: j .LBB76_9
+; RV64ZVE32F-ZVFH-NEXT: .LBB76_15: # %cond.load16
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 6
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB76_11
+; RV64ZVE32F-ZVFH-NEXT: .LBB76_16: # %cond.load19
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v8
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, 255
+; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1
+; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0)
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 7
+; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9
+; RV64ZVE32F-ZVFH-NEXT: ret
+;
+; RV32ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_zext_v8i8_v8f16:
+; RV32ZVE32F-ZVFHMIN: # %bb.0:
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vzext.vf4 v10, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: vadd.vv v10, v10, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_9
+; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB76_10
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_2: # %else2
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB76_11
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_3: # %else5
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB76_12
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_4: # %else8
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB76_13
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_5: # %else11
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB76_14
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_6: # %else14
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB76_15
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_7: # %else17
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB76_16
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_8: # %else20
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_9: # %cond.load
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB76_2
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_10: # %cond.load1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB76_3
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_11: # %cond.load4
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB76_4
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_12: # %cond.load7
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 3
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB76_5
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_13: # %cond.load10
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB76_6
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_14: # %cond.load13
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB76_7
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_15: # %cond.load16
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 6
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB76_8
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_16: # %cond.load19
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v10, 7
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+;
+; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_zext_v8i8_v8f16:
+; RV64ZVE32F-ZVFHMIN: # %bb.0:
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_2
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.load
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_2: # %else
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_4
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.load1
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_4: # %else2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_12
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_13
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_6: # %else8
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_14
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_7: # %else11
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_9
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_8: # %cond.load13
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_9: # %else14
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_15
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else17
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB76_16
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_11: # %else20
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_12: # %cond.load4
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_6
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_13: # %cond.load7
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_7
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_14: # %cond.load10
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_8
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB76_9
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_15: # %cond.load16
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 6
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB76_11
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_16: # %cond.load19
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, 255
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+ %eidxs = zext <8 x i8> %idxs to <8 x i16>
+ %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
+ %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
+ ret <8 x half> %v
+}
+
+define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x half> %passthru) {
+; RV32V-ZVFH-LABEL: mgather_baseidx_v8f16:
+; RV32V-ZVFH: # %bb.0:
+; RV32V-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, mu
+; RV32V-ZVFH-NEXT: vwadd.vv v10, v8, v8
+; RV32V-ZVFH-NEXT: vluxei32.v v9, (a0), v10, v0.t
+; RV32V-ZVFH-NEXT: vmv.v.v v8, v9
+; RV32V-ZVFH-NEXT: ret
+;
+; RV64V-ZVFH-LABEL: mgather_baseidx_v8f16:
+; RV64V-ZVFH: # %bb.0:
+; RV64V-ZVFH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFH-NEXT: vsext.vf4 v12, v8
+; RV64V-ZVFH-NEXT: vadd.vv v12, v12, v12
+; RV64V-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; RV64V-ZVFH-NEXT: vluxei64.v v9, (a0), v12, v0.t
+; RV64V-ZVFH-NEXT: vmv.v.v v8, v9
+; RV64V-ZVFH-NEXT: ret
+;
+; RV32V-ZVFHMIN-LABEL: mgather_baseidx_v8f16:
+; RV32V-ZVFHMIN: # %bb.0:
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vwadd.vv v10, v8, v8
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV32V-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
+; RV32V-ZVFHMIN-NEXT: bnez a2, .LBB77_9
+; RV32V-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB77_10
+; RV32V-ZVFHMIN-NEXT: .LBB77_2: # %else2
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB77_11
+; RV32V-ZVFHMIN-NEXT: .LBB77_3: # %else5
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB77_12
+; RV32V-ZVFHMIN-NEXT: .LBB77_4: # %else8
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB77_13
+; RV32V-ZVFHMIN-NEXT: .LBB77_5: # %else11
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB77_14
+; RV32V-ZVFHMIN-NEXT: .LBB77_6: # %else14
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB77_15
+; RV32V-ZVFHMIN-NEXT: .LBB77_7: # %else17
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB77_16
+; RV32V-ZVFHMIN-NEXT: .LBB77_8: # %else20
+; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV32V-ZVFHMIN-NEXT: ret
+; RV32V-ZVFHMIN-NEXT: .LBB77_9: # %cond.load
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v9, a0
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB77_2
+; RV32V-ZVFHMIN-NEXT: .LBB77_10: # %cond.load1
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB77_3
+; RV32V-ZVFHMIN-NEXT: .LBB77_11: # %cond.load4
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 2
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB77_4
+; RV32V-ZVFHMIN-NEXT: .LBB77_12: # %cond.load7
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 3
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB77_5
+; RV32V-ZVFHMIN-NEXT: .LBB77_13: # %cond.load10
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB77_6
+; RV32V-ZVFHMIN-NEXT: .LBB77_14: # %cond.load13
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB77_7
+; RV32V-ZVFHMIN-NEXT: .LBB77_15: # %cond.load16
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 6
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB77_8
+; RV32V-ZVFHMIN-NEXT: .LBB77_16: # %cond.load19
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v10, v10, 7
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
+; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV32V-ZVFHMIN-NEXT: ret
+;
+; RV64V-ZVFHMIN-LABEL: mgather_baseidx_v8f16:
+; RV64V-ZVFHMIN: # %bb.0:
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vsext.vf4 v12, v8
+; RV64V-ZVFHMIN-NEXT: vadd.vv v12, v12, v12
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV64V-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vadd.vx v12, v12, a0
+; RV64V-ZVFHMIN-NEXT: bnez a2, .LBB77_11
+; RV64V-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB77_12
+; RV64V-ZVFHMIN-NEXT: .LBB77_2: # %else2
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB77_13
+; RV64V-ZVFHMIN-NEXT: .LBB77_3: # %else5
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB77_5
+; RV64V-ZVFHMIN-NEXT: .LBB77_4: # %cond.load7
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 3
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
+; RV64V-ZVFHMIN-NEXT: .LBB77_5: # %else8
+; RV64V-ZVFHMIN-NEXT: addi sp, sp, -320
+; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa_offset 320
+; RV64V-ZVFHMIN-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
+; RV64V-ZVFHMIN-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
+; RV64V-ZVFHMIN-NEXT: .cfi_offset ra, -8
+; RV64V-ZVFHMIN-NEXT: .cfi_offset s0, -16
+; RV64V-ZVFHMIN-NEXT: addi s0, sp, 320
+; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa s0, 0
+; RV64V-ZVFHMIN-NEXT: andi sp, sp, -64
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB77_14
+; RV64V-ZVFHMIN-NEXT: # %bb.6: # %else11
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB77_15
+; RV64V-ZVFHMIN-NEXT: .LBB77_7: # %else14
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB77_16
+; RV64V-ZVFHMIN-NEXT: .LBB77_8: # %else17
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB77_10
+; RV64V-ZVFHMIN-NEXT: .LBB77_9: # %cond.load19
+; RV64V-ZVFHMIN-NEXT: mv a0, sp
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 56(sp)
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
+; RV64V-ZVFHMIN-NEXT: .LBB77_10: # %else20
+; RV64V-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV64V-ZVFHMIN-NEXT: addi sp, s0, -320
+; RV64V-ZVFHMIN-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
+; RV64V-ZVFHMIN-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
+; RV64V-ZVFHMIN-NEXT: addi sp, sp, 320
+; RV64V-ZVFHMIN-NEXT: ret
+; RV64V-ZVFHMIN-NEXT: .LBB77_11: # %cond.load
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v9, a0
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB77_2
+; RV64V-ZVFHMIN-NEXT: .LBB77_12: # %cond.load1
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v12, 1
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB77_3
+; RV64V-ZVFHMIN-NEXT: .LBB77_13: # %cond.load4
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 2
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 2
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB77_4
+; RV64V-ZVFHMIN-NEXT: j .LBB77_5
+; RV64V-ZVFHMIN-NEXT: .LBB77_14: # %cond.load10
+; RV64V-ZVFHMIN-NEXT: addi a0, sp, 192
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 224(sp)
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB77_7
+; RV64V-ZVFHMIN-NEXT: .LBB77_15: # %cond.load13
+; RV64V-ZVFHMIN-NEXT: addi a0, sp, 128
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 168(sp)
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB77_8
+; RV64V-ZVFHMIN-NEXT: .LBB77_16: # %cond.load16
+; RV64V-ZVFHMIN-NEXT: addi a0, sp, 64
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 112(sp)
+; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 6
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB77_9
+; RV64V-ZVFHMIN-NEXT: j .LBB77_10
+;
+; RV32ZVE32F-ZVFH-LABEL: mgather_baseidx_v8f16:
+; RV32ZVE32F-ZVFH: # %bb.0:
+; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, mu
+; RV32ZVE32F-ZVFH-NEXT: vwadd.vv v10, v8, v8
+; RV32ZVE32F-ZVFH-NEXT: vluxei32.v v9, (a0), v10, v0.t
+; RV32ZVE32F-ZVFH-NEXT: vmv.v.v v8, v9
+; RV32ZVE32F-ZVFH-NEXT: ret
+;
+; RV64ZVE32F-ZVFH-LABEL: mgather_baseidx_v8f16:
+; RV64ZVE32F-ZVFH: # %bb.0:
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_2
+; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.load
+; RV64ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v9, fa5
+; RV64ZVE32F-ZVFH-NEXT: .LBB77_2: # %else
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_4
+; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.load1
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 1
+; RV64ZVE32F-ZVFH-NEXT: .LBB77_4: # %else2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 4
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_12
+; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_13
+; RV64ZVE32F-ZVFH-NEXT: .LBB77_6: # %else8
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_14
+; RV64ZVE32F-ZVFH-NEXT: .LBB77_7: # %else11
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_9
+; RV64ZVE32F-ZVFH-NEXT: .LBB77_8: # %cond.load13
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 5
+; RV64ZVE32F-ZVFH-NEXT: .LBB77_9: # %else14
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 2
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_15
+; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else17
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB77_16
+; RV64ZVE32F-ZVFH-NEXT: .LBB77_11: # %else20
+; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9
+; RV64ZVE32F-ZVFH-NEXT: ret
+; RV64ZVE32F-ZVFH-NEXT: .LBB77_12: # %cond.load4
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v11, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v11, 2
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_6
+; RV64ZVE32F-ZVFH-NEXT: .LBB77_13: # %cond.load7
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 3
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_7
+; RV64ZVE32F-ZVFH-NEXT: .LBB77_14: # %cond.load10
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 4
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_8
+; RV64ZVE32F-ZVFH-NEXT: j .LBB77_9
+; RV64ZVE32F-ZVFH-NEXT: .LBB77_15: # %cond.load16
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a2)
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v10, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v10, 6
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB77_11
+; RV64ZVE32F-ZVFH-NEXT: .LBB77_16: # %cond.load19
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v8
+; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1
+; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1
+; RV64ZVE32F-ZVFH-NEXT: flh fa5, 0(a0)
+; RV64ZVE32F-ZVFH-NEXT: vfmv.s.f v8, fa5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 7
+; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9
+; RV64ZVE32F-ZVFH-NEXT: ret
+;
+; RV32ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_v8f16:
+; RV32ZVE32F-ZVFHMIN: # %bb.0:
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vwadd.vv v10, v8, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_9
+; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB77_10
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_2: # %else2
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB77_11
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_3: # %else5
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB77_12
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_4: # %else8
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB77_13
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_5: # %else11
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB77_14
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_6: # %else14
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB77_15
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_7: # %else17
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB77_16
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_8: # %else20
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_9: # %cond.load
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB77_2
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_10: # %cond.load1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB77_3
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_11: # %cond.load4
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB77_4
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_12: # %cond.load7
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 3
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB77_5
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_13: # %cond.load10
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB77_6
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_14: # %cond.load13
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB77_7
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_15: # %cond.load16
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 6
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB77_8
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_16: # %cond.load19
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v10, 7
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+;
+; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_v8f16:
+; RV64ZVE32F-ZVFHMIN: # %bb.0:
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_2
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.load
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_2: # %else
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_4
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.load1
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_4: # %else2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_12
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else5
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_13
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_6: # %else8
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_14
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_7: # %else11
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_9
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_8: # %cond.load13
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_9: # %else14
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_15
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else17
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB77_16
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_11: # %else20
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_12: # %cond.load4
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v11, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_6
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_13: # %cond.load7
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_7
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_14: # %cond.load10
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_8
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB77_9
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_15: # %cond.load16
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 6
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB77_11
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_16: # %cond.load19
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1
+; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
%ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs
%v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
ret <8 x half> %v
@@ -7833,11 +12977,11 @@ define <1 x float> @mgather_v1f32(<1 x ptr> %ptrs, <1 x i1> %m, <1 x float> %pas
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vfirst.m a1, v0
-; RV64ZVE32F-NEXT: bnez a1, .LBB68_2
+; RV64ZVE32F-NEXT: bnez a1, .LBB78_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vle32.v v8, (a0)
-; RV64ZVE32F-NEXT: .LBB68_2: # %else
+; RV64ZVE32F-NEXT: .LBB78_2: # %else
; RV64ZVE32F-NEXT: ret
%v = call <1 x float> @llvm.masked.gather.v1f32.v1p0(<1 x ptr> %ptrs, i32 4, <1 x i1> %m, <1 x float> %passthru)
ret <1 x float> %v
@@ -7872,19 +13016,19 @@ define <2 x float> @mgather_v2f32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x float> %pas
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
-; RV64ZVE32F-NEXT: bnez a3, .LBB69_3
+; RV64ZVE32F-NEXT: bnez a3, .LBB79_3
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB69_4
-; RV64ZVE32F-NEXT: .LBB69_2: # %else2
+; RV64ZVE32F-NEXT: bnez a2, .LBB79_4
+; RV64ZVE32F-NEXT: .LBB79_2: # %else2
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB69_3: # %cond.load
+; RV64ZVE32F-NEXT: .LBB79_3: # %cond.load
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: andi a2, a2, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB69_2
-; RV64ZVE32F-NEXT: .LBB69_4: # %cond.load1
+; RV64ZVE32F-NEXT: beqz a2, .LBB79_2
+; RV64ZVE32F-NEXT: .LBB79_4: # %cond.load1
; RV64ZVE32F-NEXT: flw fa5, 0(a1)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
@@ -7917,26 +13061,26 @@ define <4 x float> @mgather_v4f32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x float> %pas
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
-; RV64ZVE32F-NEXT: bnez a2, .LBB70_5
+; RV64ZVE32F-NEXT: bnez a2, .LBB80_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB70_6
-; RV64ZVE32F-NEXT: .LBB70_2: # %else2
+; RV64ZVE32F-NEXT: bnez a2, .LBB80_6
+; RV64ZVE32F-NEXT: .LBB80_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
-; RV64ZVE32F-NEXT: bnez a2, .LBB70_7
-; RV64ZVE32F-NEXT: .LBB70_3: # %else5
+; RV64ZVE32F-NEXT: bnez a2, .LBB80_7
+; RV64ZVE32F-NEXT: .LBB80_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
-; RV64ZVE32F-NEXT: bnez a1, .LBB70_8
-; RV64ZVE32F-NEXT: .LBB70_4: # %else8
+; RV64ZVE32F-NEXT: bnez a1, .LBB80_8
+; RV64ZVE32F-NEXT: .LBB80_4: # %else8
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB70_5: # %cond.load
+; RV64ZVE32F-NEXT: .LBB80_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB70_2
-; RV64ZVE32F-NEXT: .LBB70_6: # %cond.load1
+; RV64ZVE32F-NEXT: beqz a2, .LBB80_2
+; RV64ZVE32F-NEXT: .LBB80_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
@@ -7944,16 +13088,16 @@ define <4 x float> @mgather_v4f32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x float> %pas
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
-; RV64ZVE32F-NEXT: beqz a2, .LBB70_3
-; RV64ZVE32F-NEXT: .LBB70_7: # %cond.load4
+; RV64ZVE32F-NEXT: beqz a2, .LBB80_3
+; RV64ZVE32F-NEXT: .LBB80_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
-; RV64ZVE32F-NEXT: beqz a1, .LBB70_4
-; RV64ZVE32F-NEXT: .LBB70_8: # %cond.load7
+; RV64ZVE32F-NEXT: beqz a1, .LBB80_4
+; RV64ZVE32F-NEXT: .LBB80_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
@@ -8038,38 +13182,38 @@ define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %pas
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
-; RV64ZVE32F-NEXT: bnez a2, .LBB73_9
+; RV64ZVE32F-NEXT: bnez a2, .LBB83_9
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB73_10
-; RV64ZVE32F-NEXT: .LBB73_2: # %else2
+; RV64ZVE32F-NEXT: bnez a2, .LBB83_10
+; RV64ZVE32F-NEXT: .LBB83_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
-; RV64ZVE32F-NEXT: bnez a2, .LBB73_11
-; RV64ZVE32F-NEXT: .LBB73_3: # %else5
+; RV64ZVE32F-NEXT: bnez a2, .LBB83_11
+; RV64ZVE32F-NEXT: .LBB83_3: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB73_12
-; RV64ZVE32F-NEXT: .LBB73_4: # %else8
+; RV64ZVE32F-NEXT: bnez a2, .LBB83_12
+; RV64ZVE32F-NEXT: .LBB83_4: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB73_13
-; RV64ZVE32F-NEXT: .LBB73_5: # %else11
+; RV64ZVE32F-NEXT: bnez a2, .LBB83_13
+; RV64ZVE32F-NEXT: .LBB83_5: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB73_14
-; RV64ZVE32F-NEXT: .LBB73_6: # %else14
+; RV64ZVE32F-NEXT: bnez a2, .LBB83_14
+; RV64ZVE32F-NEXT: .LBB83_6: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: bnez a2, .LBB73_15
-; RV64ZVE32F-NEXT: .LBB73_7: # %else17
+; RV64ZVE32F-NEXT: bnez a2, .LBB83_15
+; RV64ZVE32F-NEXT: .LBB83_7: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB73_16
-; RV64ZVE32F-NEXT: .LBB73_8: # %else20
+; RV64ZVE32F-NEXT: bnez a1, .LBB83_16
+; RV64ZVE32F-NEXT: .LBB83_8: # %else20
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB73_9: # %cond.load
+; RV64ZVE32F-NEXT: .LBB83_9: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB73_2
-; RV64ZVE32F-NEXT: .LBB73_10: # %cond.load1
+; RV64ZVE32F-NEXT: beqz a2, .LBB83_2
+; RV64ZVE32F-NEXT: .LBB83_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
@@ -8077,48 +13221,48 @@ define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %pas
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
-; RV64ZVE32F-NEXT: beqz a2, .LBB73_3
-; RV64ZVE32F-NEXT: .LBB73_11: # %cond.load4
+; RV64ZVE32F-NEXT: beqz a2, .LBB83_3
+; RV64ZVE32F-NEXT: .LBB83_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB73_4
-; RV64ZVE32F-NEXT: .LBB73_12: # %cond.load7
+; RV64ZVE32F-NEXT: beqz a2, .LBB83_4
+; RV64ZVE32F-NEXT: .LBB83_12: # %cond.load7
; RV64ZVE32F-NEXT: ld a2, 24(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB73_5
-; RV64ZVE32F-NEXT: .LBB73_13: # %cond.load10
+; RV64ZVE32F-NEXT: beqz a2, .LBB83_5
+; RV64ZVE32F-NEXT: .LBB83_13: # %cond.load10
; RV64ZVE32F-NEXT: ld a2, 32(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB73_6
-; RV64ZVE32F-NEXT: .LBB73_14: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a2, .LBB83_6
+; RV64ZVE32F-NEXT: .LBB83_14: # %cond.load13
; RV64ZVE32F-NEXT: ld a2, 40(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 5
; RV64ZVE32F-NEXT: andi a2, a1, 64
-; RV64ZVE32F-NEXT: beqz a2, .LBB73_7
-; RV64ZVE32F-NEXT: .LBB73_15: # %cond.load16
+; RV64ZVE32F-NEXT: beqz a2, .LBB83_7
+; RV64ZVE32F-NEXT: .LBB83_15: # %cond.load16
; RV64ZVE32F-NEXT: ld a2, 48(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB73_8
-; RV64ZVE32F-NEXT: .LBB73_16: # %cond.load19
+; RV64ZVE32F-NEXT: beqz a1, .LBB83_8
+; RV64ZVE32F-NEXT: .LBB83_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a0, 56(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
@@ -8154,7 +13298,7 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
-; RV64ZVE32F-NEXT: beqz a2, .LBB74_2
+; RV64ZVE32F-NEXT: beqz a2, .LBB84_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -8162,9 +13306,9 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
-; RV64ZVE32F-NEXT: .LBB74_2: # %else
+; RV64ZVE32F-NEXT: .LBB84_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB74_4
+; RV64ZVE32F-NEXT: beqz a2, .LBB84_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
@@ -8176,23 +13320,23 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
-; RV64ZVE32F-NEXT: .LBB74_4: # %else2
+; RV64ZVE32F-NEXT: .LBB84_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB74_12
+; RV64ZVE32F-NEXT: bnez a2, .LBB84_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB74_13
-; RV64ZVE32F-NEXT: .LBB74_6: # %else8
+; RV64ZVE32F-NEXT: bnez a2, .LBB84_13
+; RV64ZVE32F-NEXT: .LBB84_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB74_14
-; RV64ZVE32F-NEXT: .LBB74_7: # %else11
+; RV64ZVE32F-NEXT: bnez a2, .LBB84_14
+; RV64ZVE32F-NEXT: .LBB84_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB74_9
-; RV64ZVE32F-NEXT: .LBB74_8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a2, .LBB84_9
+; RV64ZVE32F-NEXT: .LBB84_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -8203,18 +13347,18 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5
-; RV64ZVE32F-NEXT: .LBB74_9: # %else14
+; RV64ZVE32F-NEXT: .LBB84_9: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB74_15
+; RV64ZVE32F-NEXT: bnez a2, .LBB84_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB74_16
-; RV64ZVE32F-NEXT: .LBB74_11: # %else20
+; RV64ZVE32F-NEXT: bnez a1, .LBB84_16
+; RV64ZVE32F-NEXT: .LBB84_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB74_12: # %cond.load4
+; RV64ZVE32F-NEXT: .LBB84_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -8224,8 +13368,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB74_6
-; RV64ZVE32F-NEXT: .LBB74_13: # %cond.load7
+; RV64ZVE32F-NEXT: beqz a2, .LBB84_6
+; RV64ZVE32F-NEXT: .LBB84_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -8237,8 +13381,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB74_7
-; RV64ZVE32F-NEXT: .LBB74_14: # %cond.load10
+; RV64ZVE32F-NEXT: beqz a2, .LBB84_7
+; RV64ZVE32F-NEXT: .LBB84_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -8249,9 +13393,9 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB74_8
-; RV64ZVE32F-NEXT: j .LBB74_9
-; RV64ZVE32F-NEXT: .LBB74_15: # %cond.load16
+; RV64ZVE32F-NEXT: bnez a2, .LBB84_8
+; RV64ZVE32F-NEXT: j .LBB84_9
+; RV64ZVE32F-NEXT: .LBB84_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -8261,8 +13405,8 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB74_11
-; RV64ZVE32F-NEXT: .LBB74_16: # %cond.load19
+; RV64ZVE32F-NEXT: beqz a1, .LBB84_11
+; RV64ZVE32F-NEXT: .LBB84_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -8305,7 +13449,7 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
-; RV64ZVE32F-NEXT: beqz a2, .LBB75_2
+; RV64ZVE32F-NEXT: beqz a2, .LBB85_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -8313,9 +13457,9 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
-; RV64ZVE32F-NEXT: .LBB75_2: # %else
+; RV64ZVE32F-NEXT: .LBB85_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB75_4
+; RV64ZVE32F-NEXT: beqz a2, .LBB85_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
@@ -8327,23 +13471,23 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
-; RV64ZVE32F-NEXT: .LBB75_4: # %else2
+; RV64ZVE32F-NEXT: .LBB85_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB75_12
+; RV64ZVE32F-NEXT: bnez a2, .LBB85_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB75_13
-; RV64ZVE32F-NEXT: .LBB75_6: # %else8
+; RV64ZVE32F-NEXT: bnez a2, .LBB85_13
+; RV64ZVE32F-NEXT: .LBB85_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB75_14
-; RV64ZVE32F-NEXT: .LBB75_7: # %else11
+; RV64ZVE32F-NEXT: bnez a2, .LBB85_14
+; RV64ZVE32F-NEXT: .LBB85_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB75_9
-; RV64ZVE32F-NEXT: .LBB75_8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a2, .LBB85_9
+; RV64ZVE32F-NEXT: .LBB85_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -8354,18 +13498,18 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5
-; RV64ZVE32F-NEXT: .LBB75_9: # %else14
+; RV64ZVE32F-NEXT: .LBB85_9: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB75_15
+; RV64ZVE32F-NEXT: bnez a2, .LBB85_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB75_16
-; RV64ZVE32F-NEXT: .LBB75_11: # %else20
+; RV64ZVE32F-NEXT: bnez a1, .LBB85_16
+; RV64ZVE32F-NEXT: .LBB85_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB75_12: # %cond.load4
+; RV64ZVE32F-NEXT: .LBB85_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -8375,8 +13519,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB75_6
-; RV64ZVE32F-NEXT: .LBB75_13: # %cond.load7
+; RV64ZVE32F-NEXT: beqz a2, .LBB85_6
+; RV64ZVE32F-NEXT: .LBB85_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -8388,8 +13532,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB75_7
-; RV64ZVE32F-NEXT: .LBB75_14: # %cond.load10
+; RV64ZVE32F-NEXT: beqz a2, .LBB85_7
+; RV64ZVE32F-NEXT: .LBB85_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -8400,9 +13544,9 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB75_8
-; RV64ZVE32F-NEXT: j .LBB75_9
-; RV64ZVE32F-NEXT: .LBB75_15: # %cond.load16
+; RV64ZVE32F-NEXT: bnez a2, .LBB85_8
+; RV64ZVE32F-NEXT: j .LBB85_9
+; RV64ZVE32F-NEXT: .LBB85_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -8412,8 +13556,8 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB75_11
-; RV64ZVE32F-NEXT: .LBB75_16: # %cond.load19
+; RV64ZVE32F-NEXT: beqz a1, .LBB85_11
+; RV64ZVE32F-NEXT: .LBB85_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -8458,7 +13602,7 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
-; RV64ZVE32F-NEXT: beqz a2, .LBB76_2
+; RV64ZVE32F-NEXT: beqz a2, .LBB86_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
@@ -8467,9 +13611,9 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
-; RV64ZVE32F-NEXT: .LBB76_2: # %else
+; RV64ZVE32F-NEXT: .LBB86_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB76_4
+; RV64ZVE32F-NEXT: beqz a2, .LBB86_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
@@ -8482,23 +13626,23 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
-; RV64ZVE32F-NEXT: .LBB76_4: # %else2
+; RV64ZVE32F-NEXT: .LBB86_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB76_12
+; RV64ZVE32F-NEXT: bnez a2, .LBB86_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB76_13
-; RV64ZVE32F-NEXT: .LBB76_6: # %else8
+; RV64ZVE32F-NEXT: bnez a2, .LBB86_13
+; RV64ZVE32F-NEXT: .LBB86_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB76_14
-; RV64ZVE32F-NEXT: .LBB76_7: # %else11
+; RV64ZVE32F-NEXT: bnez a2, .LBB86_14
+; RV64ZVE32F-NEXT: .LBB86_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB76_9
-; RV64ZVE32F-NEXT: .LBB76_8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a2, .LBB86_9
+; RV64ZVE32F-NEXT: .LBB86_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -8510,18 +13654,18 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5
-; RV64ZVE32F-NEXT: .LBB76_9: # %else14
+; RV64ZVE32F-NEXT: .LBB86_9: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB76_15
+; RV64ZVE32F-NEXT: bnez a2, .LBB86_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB76_16
-; RV64ZVE32F-NEXT: .LBB76_11: # %else20
+; RV64ZVE32F-NEXT: bnez a1, .LBB86_16
+; RV64ZVE32F-NEXT: .LBB86_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB76_12: # %cond.load4
+; RV64ZVE32F-NEXT: .LBB86_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -8532,8 +13676,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB76_6
-; RV64ZVE32F-NEXT: .LBB76_13: # %cond.load7
+; RV64ZVE32F-NEXT: beqz a2, .LBB86_6
+; RV64ZVE32F-NEXT: .LBB86_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -8546,8 +13690,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB76_7
-; RV64ZVE32F-NEXT: .LBB76_14: # %cond.load10
+; RV64ZVE32F-NEXT: beqz a2, .LBB86_7
+; RV64ZVE32F-NEXT: .LBB86_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: andi a2, a2, 255
@@ -8559,9 +13703,9 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB76_8
-; RV64ZVE32F-NEXT: j .LBB76_9
-; RV64ZVE32F-NEXT: .LBB76_15: # %cond.load16
+; RV64ZVE32F-NEXT: bnez a2, .LBB86_8
+; RV64ZVE32F-NEXT: j .LBB86_9
+; RV64ZVE32F-NEXT: .LBB86_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -8572,8 +13716,8 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB76_11
-; RV64ZVE32F-NEXT: .LBB76_16: # %cond.load19
+; RV64ZVE32F-NEXT: beqz a1, .LBB86_11
+; RV64ZVE32F-NEXT: .LBB86_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -8618,7 +13762,7 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
-; RV64ZVE32F-NEXT: beqz a2, .LBB77_2
+; RV64ZVE32F-NEXT: beqz a2, .LBB87_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -8627,9 +13771,9 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
-; RV64ZVE32F-NEXT: .LBB77_2: # %else
+; RV64ZVE32F-NEXT: .LBB87_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB77_4
+; RV64ZVE32F-NEXT: beqz a2, .LBB87_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
@@ -8641,23 +13785,23 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
-; RV64ZVE32F-NEXT: .LBB77_4: # %else2
+; RV64ZVE32F-NEXT: .LBB87_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB77_12
+; RV64ZVE32F-NEXT: bnez a2, .LBB87_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB77_13
-; RV64ZVE32F-NEXT: .LBB77_6: # %else8
+; RV64ZVE32F-NEXT: bnez a2, .LBB87_13
+; RV64ZVE32F-NEXT: .LBB87_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB77_14
-; RV64ZVE32F-NEXT: .LBB77_7: # %else11
+; RV64ZVE32F-NEXT: bnez a2, .LBB87_14
+; RV64ZVE32F-NEXT: .LBB87_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB77_9
-; RV64ZVE32F-NEXT: .LBB77_8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a2, .LBB87_9
+; RV64ZVE32F-NEXT: .LBB87_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -8668,18 +13812,18 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5
-; RV64ZVE32F-NEXT: .LBB77_9: # %else14
+; RV64ZVE32F-NEXT: .LBB87_9: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB77_15
+; RV64ZVE32F-NEXT: bnez a2, .LBB87_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB77_16
-; RV64ZVE32F-NEXT: .LBB77_11: # %else20
+; RV64ZVE32F-NEXT: bnez a1, .LBB87_16
+; RV64ZVE32F-NEXT: .LBB87_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB77_12: # %cond.load4
+; RV64ZVE32F-NEXT: .LBB87_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -8689,8 +13833,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB77_6
-; RV64ZVE32F-NEXT: .LBB77_13: # %cond.load7
+; RV64ZVE32F-NEXT: beqz a2, .LBB87_6
+; RV64ZVE32F-NEXT: .LBB87_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -8702,8 +13846,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB77_7
-; RV64ZVE32F-NEXT: .LBB77_14: # %cond.load10
+; RV64ZVE32F-NEXT: beqz a2, .LBB87_7
+; RV64ZVE32F-NEXT: .LBB87_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -8714,9 +13858,9 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB77_8
-; RV64ZVE32F-NEXT: j .LBB77_9
-; RV64ZVE32F-NEXT: .LBB77_15: # %cond.load16
+; RV64ZVE32F-NEXT: bnez a2, .LBB87_8
+; RV64ZVE32F-NEXT: j .LBB87_9
+; RV64ZVE32F-NEXT: .LBB87_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -8726,8 +13870,8 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB77_11
-; RV64ZVE32F-NEXT: .LBB77_16: # %cond.load19
+; RV64ZVE32F-NEXT: beqz a1, .LBB87_11
+; RV64ZVE32F-NEXT: .LBB87_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -8770,7 +13914,7 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
-; RV64ZVE32F-NEXT: beqz a2, .LBB78_2
+; RV64ZVE32F-NEXT: beqz a2, .LBB88_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -8779,9 +13923,9 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
-; RV64ZVE32F-NEXT: .LBB78_2: # %else
+; RV64ZVE32F-NEXT: .LBB88_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB78_4
+; RV64ZVE32F-NEXT: beqz a2, .LBB88_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
@@ -8793,23 +13937,23 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
-; RV64ZVE32F-NEXT: .LBB78_4: # %else2
+; RV64ZVE32F-NEXT: .LBB88_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB78_12
+; RV64ZVE32F-NEXT: bnez a2, .LBB88_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB78_13
-; RV64ZVE32F-NEXT: .LBB78_6: # %else8
+; RV64ZVE32F-NEXT: bnez a2, .LBB88_13
+; RV64ZVE32F-NEXT: .LBB88_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB78_14
-; RV64ZVE32F-NEXT: .LBB78_7: # %else11
+; RV64ZVE32F-NEXT: bnez a2, .LBB88_14
+; RV64ZVE32F-NEXT: .LBB88_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB78_9
-; RV64ZVE32F-NEXT: .LBB78_8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a2, .LBB88_9
+; RV64ZVE32F-NEXT: .LBB88_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -8820,18 +13964,18 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5
-; RV64ZVE32F-NEXT: .LBB78_9: # %else14
+; RV64ZVE32F-NEXT: .LBB88_9: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB78_15
+; RV64ZVE32F-NEXT: bnez a2, .LBB88_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB78_16
-; RV64ZVE32F-NEXT: .LBB78_11: # %else20
+; RV64ZVE32F-NEXT: bnez a1, .LBB88_16
+; RV64ZVE32F-NEXT: .LBB88_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB78_12: # %cond.load4
+; RV64ZVE32F-NEXT: .LBB88_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -8841,8 +13985,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB78_6
-; RV64ZVE32F-NEXT: .LBB78_13: # %cond.load7
+; RV64ZVE32F-NEXT: beqz a2, .LBB88_6
+; RV64ZVE32F-NEXT: .LBB88_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -8854,8 +13998,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB78_7
-; RV64ZVE32F-NEXT: .LBB78_14: # %cond.load10
+; RV64ZVE32F-NEXT: beqz a2, .LBB88_7
+; RV64ZVE32F-NEXT: .LBB88_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -8866,9 +14010,9 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB78_8
-; RV64ZVE32F-NEXT: j .LBB78_9
-; RV64ZVE32F-NEXT: .LBB78_15: # %cond.load16
+; RV64ZVE32F-NEXT: bnez a2, .LBB88_8
+; RV64ZVE32F-NEXT: j .LBB88_9
+; RV64ZVE32F-NEXT: .LBB88_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -8878,8 +14022,8 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB78_11
-; RV64ZVE32F-NEXT: .LBB78_16: # %cond.load19
+; RV64ZVE32F-NEXT: beqz a1, .LBB88_11
+; RV64ZVE32F-NEXT: .LBB88_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -8924,7 +14068,7 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: addiw a1, a1, -1
-; RV64ZVE32F-NEXT: beqz a3, .LBB79_2
+; RV64ZVE32F-NEXT: beqz a3, .LBB89_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
@@ -8934,9 +14078,9 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: flw fa5, 0(a3)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
-; RV64ZVE32F-NEXT: .LBB79_2: # %else
+; RV64ZVE32F-NEXT: .LBB89_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB79_4
+; RV64ZVE32F-NEXT: beqz a3, .LBB89_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
@@ -8949,23 +14093,23 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
-; RV64ZVE32F-NEXT: .LBB79_4: # %else2
+; RV64ZVE32F-NEXT: .LBB89_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB79_12
+; RV64ZVE32F-NEXT: bnez a3, .LBB89_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: bnez a3, .LBB79_13
-; RV64ZVE32F-NEXT: .LBB79_6: # %else8
+; RV64ZVE32F-NEXT: bnez a3, .LBB89_13
+; RV64ZVE32F-NEXT: .LBB89_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: bnez a3, .LBB79_14
-; RV64ZVE32F-NEXT: .LBB79_7: # %else11
+; RV64ZVE32F-NEXT: bnez a3, .LBB89_14
+; RV64ZVE32F-NEXT: .LBB89_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: beqz a3, .LBB79_9
-; RV64ZVE32F-NEXT: .LBB79_8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a3, .LBB89_9
+; RV64ZVE32F-NEXT: .LBB89_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
@@ -8977,18 +14121,18 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5
-; RV64ZVE32F-NEXT: .LBB79_9: # %else14
+; RV64ZVE32F-NEXT: .LBB89_9: # %else14
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB79_15
+; RV64ZVE32F-NEXT: bnez a3, .LBB89_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
-; RV64ZVE32F-NEXT: bnez a2, .LBB79_16
-; RV64ZVE32F-NEXT: .LBB79_11: # %else20
+; RV64ZVE32F-NEXT: bnez a2, .LBB89_16
+; RV64ZVE32F-NEXT: .LBB89_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB79_12: # %cond.load4
+; RV64ZVE32F-NEXT: .LBB89_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
@@ -8999,8 +14143,8 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB79_6
-; RV64ZVE32F-NEXT: .LBB79_13: # %cond.load7
+; RV64ZVE32F-NEXT: beqz a3, .LBB89_6
+; RV64ZVE32F-NEXT: .LBB89_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
@@ -9013,8 +14157,8 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: beqz a3, .LBB79_7
-; RV64ZVE32F-NEXT: .LBB79_14: # %cond.load10
+; RV64ZVE32F-NEXT: beqz a3, .LBB89_7
+; RV64ZVE32F-NEXT: .LBB89_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: and a3, a3, a1
@@ -9026,9 +14170,9 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: bnez a3, .LBB79_8
-; RV64ZVE32F-NEXT: j .LBB79_9
-; RV64ZVE32F-NEXT: .LBB79_15: # %cond.load16
+; RV64ZVE32F-NEXT: bnez a3, .LBB89_8
+; RV64ZVE32F-NEXT: j .LBB89_9
+; RV64ZVE32F-NEXT: .LBB89_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
@@ -9039,8 +14183,8 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a2, a2, -128
-; RV64ZVE32F-NEXT: beqz a2, .LBB79_11
-; RV64ZVE32F-NEXT: .LBB79_16: # %cond.load19
+; RV64ZVE32F-NEXT: beqz a2, .LBB89_11
+; RV64ZVE32F-NEXT: .LBB89_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -9084,7 +14228,7 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
-; RV64ZVE32F-NEXT: beqz a2, .LBB80_2
+; RV64ZVE32F-NEXT: beqz a2, .LBB90_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -9092,9 +14236,9 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
-; RV64ZVE32F-NEXT: .LBB80_2: # %else
+; RV64ZVE32F-NEXT: .LBB90_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB80_4
+; RV64ZVE32F-NEXT: beqz a2, .LBB90_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
@@ -9104,23 +14248,23 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
-; RV64ZVE32F-NEXT: .LBB80_4: # %else2
+; RV64ZVE32F-NEXT: .LBB90_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB80_12
+; RV64ZVE32F-NEXT: bnez a2, .LBB90_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB80_13
-; RV64ZVE32F-NEXT: .LBB80_6: # %else8
+; RV64ZVE32F-NEXT: bnez a2, .LBB90_13
+; RV64ZVE32F-NEXT: .LBB90_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB80_14
-; RV64ZVE32F-NEXT: .LBB80_7: # %else11
+; RV64ZVE32F-NEXT: bnez a2, .LBB90_14
+; RV64ZVE32F-NEXT: .LBB90_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB80_9
-; RV64ZVE32F-NEXT: .LBB80_8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a2, .LBB90_9
+; RV64ZVE32F-NEXT: .LBB90_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -9130,18 +14274,18 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
-; RV64ZVE32F-NEXT: .LBB80_9: # %else14
+; RV64ZVE32F-NEXT: .LBB90_9: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB80_15
+; RV64ZVE32F-NEXT: bnez a2, .LBB90_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB80_16
-; RV64ZVE32F-NEXT: .LBB80_11: # %else20
+; RV64ZVE32F-NEXT: bnez a1, .LBB90_16
+; RV64ZVE32F-NEXT: .LBB90_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB80_12: # %cond.load4
+; RV64ZVE32F-NEXT: .LBB90_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -9150,8 +14294,8 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB80_6
-; RV64ZVE32F-NEXT: .LBB80_13: # %cond.load7
+; RV64ZVE32F-NEXT: beqz a2, .LBB90_6
+; RV64ZVE32F-NEXT: .LBB90_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -9161,8 +14305,8 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB80_7
-; RV64ZVE32F-NEXT: .LBB80_14: # %cond.load10
+; RV64ZVE32F-NEXT: beqz a2, .LBB90_7
+; RV64ZVE32F-NEXT: .LBB90_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -9171,9 +14315,9 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB80_8
-; RV64ZVE32F-NEXT: j .LBB80_9
-; RV64ZVE32F-NEXT: .LBB80_15: # %cond.load16
+; RV64ZVE32F-NEXT: bnez a2, .LBB90_8
+; RV64ZVE32F-NEXT: j .LBB90_9
+; RV64ZVE32F-NEXT: .LBB90_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -9182,8 +14326,8 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB80_11
-; RV64ZVE32F-NEXT: .LBB80_16: # %cond.load19
+; RV64ZVE32F-NEXT: beqz a1, .LBB90_11
+; RV64ZVE32F-NEXT: .LBB90_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
@@ -9221,22 +14365,22 @@ define <1 x double> @mgather_v1f64(<1 x ptr> %ptrs, <1 x i1> %m, <1 x double> %p
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vfirst.m a0, v0
-; RV32ZVE32F-NEXT: bnez a0, .LBB81_2
+; RV32ZVE32F-NEXT: bnez a0, .LBB91_2
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a0)
-; RV32ZVE32F-NEXT: .LBB81_2: # %else
+; RV32ZVE32F-NEXT: .LBB91_2: # %else
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v1f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vfirst.m a1, v0
-; RV64ZVE32F-NEXT: bnez a1, .LBB81_2
+; RV64ZVE32F-NEXT: bnez a1, .LBB91_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: fld fa0, 0(a0)
-; RV64ZVE32F-NEXT: .LBB81_2: # %else
+; RV64ZVE32F-NEXT: .LBB91_2: # %else
; RV64ZVE32F-NEXT: ret
%v = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> %ptrs, i32 8, <1 x i1> %m, <1 x double> %passthru)
ret <1 x double> %v
@@ -9264,19 +14408,19 @@ define <2 x double> @mgather_v2f64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x double> %p
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a0, v0
; RV32ZVE32F-NEXT: andi a1, a0, 1
-; RV32ZVE32F-NEXT: bnez a1, .LBB82_3
+; RV32ZVE32F-NEXT: bnez a1, .LBB92_3
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a0, 2
-; RV32ZVE32F-NEXT: bnez a0, .LBB82_4
-; RV32ZVE32F-NEXT: .LBB82_2: # %else2
+; RV32ZVE32F-NEXT: bnez a0, .LBB92_4
+; RV32ZVE32F-NEXT: .LBB92_2: # %else2
; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB82_3: # %cond.load
+; RV32ZVE32F-NEXT: .LBB92_3: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
; RV32ZVE32F-NEXT: andi a0, a0, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB82_2
-; RV32ZVE32F-NEXT: .LBB82_4: # %cond.load1
+; RV32ZVE32F-NEXT: beqz a0, .LBB92_2
+; RV32ZVE32F-NEXT: .LBB92_4: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -9288,17 +14432,17 @@ define <2 x double> @mgather_v2f64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x double> %p
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
-; RV64ZVE32F-NEXT: bnez a3, .LBB82_3
+; RV64ZVE32F-NEXT: bnez a3, .LBB92_3
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB82_4
-; RV64ZVE32F-NEXT: .LBB82_2: # %else2
+; RV64ZVE32F-NEXT: bnez a2, .LBB92_4
+; RV64ZVE32F-NEXT: .LBB92_2: # %else2
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB82_3: # %cond.load
+; RV64ZVE32F-NEXT: .LBB92_3: # %cond.load
; RV64ZVE32F-NEXT: fld fa0, 0(a0)
; RV64ZVE32F-NEXT: andi a2, a2, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB82_2
-; RV64ZVE32F-NEXT: .LBB82_4: # %cond.load1
+; RV64ZVE32F-NEXT: beqz a2, .LBB92_2
+; RV64ZVE32F-NEXT: .LBB92_4: # %cond.load1
; RV64ZVE32F-NEXT: fld fa1, 0(a1)
; RV64ZVE32F-NEXT: ret
%v = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 8, <2 x i1> %m, <2 x double> %passthru)
@@ -9327,89 +14471,89 @@ define <4 x double> @mgather_v4f64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x double> %p
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
-; RV32ZVE32F-NEXT: bnez a2, .LBB83_6
+; RV32ZVE32F-NEXT: bnez a2, .LBB93_6
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
-; RV32ZVE32F-NEXT: bnez a2, .LBB83_7
-; RV32ZVE32F-NEXT: .LBB83_2: # %else2
+; RV32ZVE32F-NEXT: bnez a2, .LBB93_7
+; RV32ZVE32F-NEXT: .LBB93_2: # %else2
; RV32ZVE32F-NEXT: andi a2, a1, 4
-; RV32ZVE32F-NEXT: bnez a2, .LBB83_8
-; RV32ZVE32F-NEXT: .LBB83_3: # %else5
+; RV32ZVE32F-NEXT: bnez a2, .LBB93_8
+; RV32ZVE32F-NEXT: .LBB93_3: # %else5
; RV32ZVE32F-NEXT: andi a1, a1, 8
-; RV32ZVE32F-NEXT: beqz a1, .LBB83_5
-; RV32ZVE32F-NEXT: .LBB83_4: # %cond.load7
+; RV32ZVE32F-NEXT: beqz a1, .LBB93_5
+; RV32ZVE32F-NEXT: .LBB93_4: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
-; RV32ZVE32F-NEXT: .LBB83_5: # %else8
+; RV32ZVE32F-NEXT: .LBB93_5: # %else8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB83_6: # %cond.load
+; RV32ZVE32F-NEXT: .LBB93_6: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 2
-; RV32ZVE32F-NEXT: beqz a2, .LBB83_2
-; RV32ZVE32F-NEXT: .LBB83_7: # %cond.load1
+; RV32ZVE32F-NEXT: beqz a2, .LBB93_2
+; RV32ZVE32F-NEXT: .LBB93_7: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v9
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 4
-; RV32ZVE32F-NEXT: beqz a2, .LBB83_3
-; RV32ZVE32F-NEXT: .LBB83_8: # %cond.load4
+; RV32ZVE32F-NEXT: beqz a2, .LBB93_3
+; RV32ZVE32F-NEXT: .LBB93_8: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v9
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
; RV32ZVE32F-NEXT: andi a1, a1, 8
-; RV32ZVE32F-NEXT: bnez a1, .LBB83_4
-; RV32ZVE32F-NEXT: j .LBB83_5
+; RV32ZVE32F-NEXT: bnez a1, .LBB93_4
+; RV32ZVE32F-NEXT: j .LBB93_5
;
; RV64ZVE32F-LABEL: mgather_v4f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
-; RV64ZVE32F-NEXT: bnez a3, .LBB83_6
+; RV64ZVE32F-NEXT: bnez a3, .LBB93_6
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB83_7
-; RV64ZVE32F-NEXT: .LBB83_2: # %else2
+; RV64ZVE32F-NEXT: bnez a3, .LBB93_7
+; RV64ZVE32F-NEXT: .LBB93_2: # %else2
; RV64ZVE32F-NEXT: andi a3, a2, 4
-; RV64ZVE32F-NEXT: bnez a3, .LBB83_8
-; RV64ZVE32F-NEXT: .LBB83_3: # %else5
+; RV64ZVE32F-NEXT: bnez a3, .LBB93_8
+; RV64ZVE32F-NEXT: .LBB93_3: # %else5
; RV64ZVE32F-NEXT: andi a2, a2, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB83_5
-; RV64ZVE32F-NEXT: .LBB83_4: # %cond.load7
+; RV64ZVE32F-NEXT: beqz a2, .LBB93_5
+; RV64ZVE32F-NEXT: .LBB93_4: # %cond.load7
; RV64ZVE32F-NEXT: ld a1, 24(a1)
; RV64ZVE32F-NEXT: fld fa3, 0(a1)
-; RV64ZVE32F-NEXT: .LBB83_5: # %else8
+; RV64ZVE32F-NEXT: .LBB93_5: # %else8
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB83_6: # %cond.load
+; RV64ZVE32F-NEXT: .LBB93_6: # %cond.load
; RV64ZVE32F-NEXT: ld a3, 0(a1)
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB83_2
-; RV64ZVE32F-NEXT: .LBB83_7: # %cond.load1
+; RV64ZVE32F-NEXT: beqz a3, .LBB93_2
+; RV64ZVE32F-NEXT: .LBB93_7: # %cond.load1
; RV64ZVE32F-NEXT: ld a3, 8(a1)
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 4
-; RV64ZVE32F-NEXT: beqz a3, .LBB83_3
-; RV64ZVE32F-NEXT: .LBB83_8: # %cond.load4
+; RV64ZVE32F-NEXT: beqz a3, .LBB93_3
+; RV64ZVE32F-NEXT: .LBB93_8: # %cond.load4
; RV64ZVE32F-NEXT: ld a3, 16(a1)
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a2, a2, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB83_4
-; RV64ZVE32F-NEXT: j .LBB83_5
+; RV64ZVE32F-NEXT: bnez a2, .LBB93_4
+; RV64ZVE32F-NEXT: j .LBB93_5
%v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> %m, <4 x double> %passthru)
ret <4 x double> %v
}
@@ -9519,34 +14663,34 @@ define <8 x double> @mgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x double> %p
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
-; RV32ZVE32F-NEXT: bnez a2, .LBB86_10
+; RV32ZVE32F-NEXT: bnez a2, .LBB96_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
-; RV32ZVE32F-NEXT: bnez a2, .LBB86_11
-; RV32ZVE32F-NEXT: .LBB86_2: # %else2
+; RV32ZVE32F-NEXT: bnez a2, .LBB96_11
+; RV32ZVE32F-NEXT: .LBB96_2: # %else2
; RV32ZVE32F-NEXT: andi a2, a1, 4
-; RV32ZVE32F-NEXT: bnez a2, .LBB86_12
-; RV32ZVE32F-NEXT: .LBB86_3: # %else5
+; RV32ZVE32F-NEXT: bnez a2, .LBB96_12
+; RV32ZVE32F-NEXT: .LBB96_3: # %else5
; RV32ZVE32F-NEXT: andi a2, a1, 8
-; RV32ZVE32F-NEXT: bnez a2, .LBB86_13
-; RV32ZVE32F-NEXT: .LBB86_4: # %else8
+; RV32ZVE32F-NEXT: bnez a2, .LBB96_13
+; RV32ZVE32F-NEXT: .LBB96_4: # %else8
; RV32ZVE32F-NEXT: andi a2, a1, 16
-; RV32ZVE32F-NEXT: bnez a2, .LBB86_14
-; RV32ZVE32F-NEXT: .LBB86_5: # %else11
+; RV32ZVE32F-NEXT: bnez a2, .LBB96_14
+; RV32ZVE32F-NEXT: .LBB96_5: # %else11
; RV32ZVE32F-NEXT: andi a2, a1, 32
-; RV32ZVE32F-NEXT: bnez a2, .LBB86_15
-; RV32ZVE32F-NEXT: .LBB86_6: # %else14
+; RV32ZVE32F-NEXT: bnez a2, .LBB96_15
+; RV32ZVE32F-NEXT: .LBB96_6: # %else14
; RV32ZVE32F-NEXT: andi a2, a1, 64
-; RV32ZVE32F-NEXT: bnez a2, .LBB86_16
-; RV32ZVE32F-NEXT: .LBB86_7: # %else17
+; RV32ZVE32F-NEXT: bnez a2, .LBB96_16
+; RV32ZVE32F-NEXT: .LBB96_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
-; RV32ZVE32F-NEXT: beqz a1, .LBB86_9
-; RV32ZVE32F-NEXT: .LBB86_8: # %cond.load19
+; RV32ZVE32F-NEXT: beqz a1, .LBB96_9
+; RV32ZVE32F-NEXT: .LBB96_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
-; RV32ZVE32F-NEXT: .LBB86_9: # %else20
+; RV32ZVE32F-NEXT: .LBB96_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
@@ -9556,87 +14700,87 @@ define <8 x double> @mgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x double> %p
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB86_10: # %cond.load
+; RV32ZVE32F-NEXT: .LBB96_10: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 2
-; RV32ZVE32F-NEXT: beqz a2, .LBB86_2
-; RV32ZVE32F-NEXT: .LBB86_11: # %cond.load1
+; RV32ZVE32F-NEXT: beqz a2, .LBB96_2
+; RV32ZVE32F-NEXT: .LBB96_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 4
-; RV32ZVE32F-NEXT: beqz a2, .LBB86_3
-; RV32ZVE32F-NEXT: .LBB86_12: # %cond.load4
+; RV32ZVE32F-NEXT: beqz a2, .LBB96_3
+; RV32ZVE32F-NEXT: .LBB96_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 8
-; RV32ZVE32F-NEXT: beqz a2, .LBB86_4
-; RV32ZVE32F-NEXT: .LBB86_13: # %cond.load7
+; RV32ZVE32F-NEXT: beqz a2, .LBB96_4
+; RV32ZVE32F-NEXT: .LBB96_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 16
-; RV32ZVE32F-NEXT: beqz a2, .LBB86_5
-; RV32ZVE32F-NEXT: .LBB86_14: # %cond.load10
+; RV32ZVE32F-NEXT: beqz a2, .LBB96_5
+; RV32ZVE32F-NEXT: .LBB96_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 32
-; RV32ZVE32F-NEXT: beqz a2, .LBB86_6
-; RV32ZVE32F-NEXT: .LBB86_15: # %cond.load13
+; RV32ZVE32F-NEXT: beqz a2, .LBB96_6
+; RV32ZVE32F-NEXT: .LBB96_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 64
-; RV32ZVE32F-NEXT: beqz a2, .LBB86_7
-; RV32ZVE32F-NEXT: .LBB86_16: # %cond.load16
+; RV32ZVE32F-NEXT: beqz a2, .LBB96_7
+; RV32ZVE32F-NEXT: .LBB96_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
; RV32ZVE32F-NEXT: andi a1, a1, -128
-; RV32ZVE32F-NEXT: bnez a1, .LBB86_8
-; RV32ZVE32F-NEXT: j .LBB86_9
+; RV32ZVE32F-NEXT: bnez a1, .LBB96_8
+; RV32ZVE32F-NEXT: j .LBB96_9
;
; RV64ZVE32F-LABEL: mgather_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
-; RV64ZVE32F-NEXT: bnez a3, .LBB86_10
+; RV64ZVE32F-NEXT: bnez a3, .LBB96_10
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB86_11
-; RV64ZVE32F-NEXT: .LBB86_2: # %else2
+; RV64ZVE32F-NEXT: bnez a3, .LBB96_11
+; RV64ZVE32F-NEXT: .LBB96_2: # %else2
; RV64ZVE32F-NEXT: andi a3, a2, 4
-; RV64ZVE32F-NEXT: bnez a3, .LBB86_12
-; RV64ZVE32F-NEXT: .LBB86_3: # %else5
+; RV64ZVE32F-NEXT: bnez a3, .LBB96_12
+; RV64ZVE32F-NEXT: .LBB96_3: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: bnez a3, .LBB86_13
-; RV64ZVE32F-NEXT: .LBB86_4: # %else8
+; RV64ZVE32F-NEXT: bnez a3, .LBB96_13
+; RV64ZVE32F-NEXT: .LBB96_4: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: bnez a3, .LBB86_14
-; RV64ZVE32F-NEXT: .LBB86_5: # %else11
+; RV64ZVE32F-NEXT: bnez a3, .LBB96_14
+; RV64ZVE32F-NEXT: .LBB96_5: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: bnez a3, .LBB86_15
-; RV64ZVE32F-NEXT: .LBB86_6: # %else14
+; RV64ZVE32F-NEXT: bnez a3, .LBB96_15
+; RV64ZVE32F-NEXT: .LBB96_6: # %else14
; RV64ZVE32F-NEXT: andi a3, a2, 64
-; RV64ZVE32F-NEXT: bnez a3, .LBB86_16
-; RV64ZVE32F-NEXT: .LBB86_7: # %else17
+; RV64ZVE32F-NEXT: bnez a3, .LBB96_16
+; RV64ZVE32F-NEXT: .LBB96_7: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
-; RV64ZVE32F-NEXT: beqz a2, .LBB86_9
-; RV64ZVE32F-NEXT: .LBB86_8: # %cond.load19
+; RV64ZVE32F-NEXT: beqz a2, .LBB96_9
+; RV64ZVE32F-NEXT: .LBB96_8: # %cond.load19
; RV64ZVE32F-NEXT: ld a1, 56(a1)
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
-; RV64ZVE32F-NEXT: .LBB86_9: # %else20
+; RV64ZVE32F-NEXT: .LBB96_9: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
@@ -9646,42 +14790,42 @@ define <8 x double> @mgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x double> %p
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB86_10: # %cond.load
+; RV64ZVE32F-NEXT: .LBB96_10: # %cond.load
; RV64ZVE32F-NEXT: ld a3, 0(a1)
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB86_2
-; RV64ZVE32F-NEXT: .LBB86_11: # %cond.load1
+; RV64ZVE32F-NEXT: beqz a3, .LBB96_2
+; RV64ZVE32F-NEXT: .LBB96_11: # %cond.load1
; RV64ZVE32F-NEXT: ld a3, 8(a1)
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 4
-; RV64ZVE32F-NEXT: beqz a3, .LBB86_3
-; RV64ZVE32F-NEXT: .LBB86_12: # %cond.load4
+; RV64ZVE32F-NEXT: beqz a3, .LBB96_3
+; RV64ZVE32F-NEXT: .LBB96_12: # %cond.load4
; RV64ZVE32F-NEXT: ld a3, 16(a1)
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB86_4
-; RV64ZVE32F-NEXT: .LBB86_13: # %cond.load7
+; RV64ZVE32F-NEXT: beqz a3, .LBB96_4
+; RV64ZVE32F-NEXT: .LBB96_13: # %cond.load7
; RV64ZVE32F-NEXT: ld a3, 24(a1)
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: beqz a3, .LBB86_5
-; RV64ZVE32F-NEXT: .LBB86_14: # %cond.load10
+; RV64ZVE32F-NEXT: beqz a3, .LBB96_5
+; RV64ZVE32F-NEXT: .LBB96_14: # %cond.load10
; RV64ZVE32F-NEXT: ld a3, 32(a1)
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: beqz a3, .LBB86_6
-; RV64ZVE32F-NEXT: .LBB86_15: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a3, .LBB96_6
+; RV64ZVE32F-NEXT: .LBB96_15: # %cond.load13
; RV64ZVE32F-NEXT: ld a3, 40(a1)
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 64
-; RV64ZVE32F-NEXT: beqz a3, .LBB86_7
-; RV64ZVE32F-NEXT: .LBB86_16: # %cond.load16
+; RV64ZVE32F-NEXT: beqz a3, .LBB96_7
+; RV64ZVE32F-NEXT: .LBB96_16: # %cond.load16
; RV64ZVE32F-NEXT: ld a3, 48(a1)
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
; RV64ZVE32F-NEXT: andi a2, a2, -128
-; RV64ZVE32F-NEXT: bnez a2, .LBB86_8
-; RV64ZVE32F-NEXT: j .LBB86_9
+; RV64ZVE32F-NEXT: bnez a2, .LBB96_8
+; RV64ZVE32F-NEXT: j .LBB96_9
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
}
@@ -9716,34 +14860,34 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x
; RV32ZVE32F-NEXT: andi a3, a2, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
-; RV32ZVE32F-NEXT: bnez a3, .LBB87_10
+; RV32ZVE32F-NEXT: bnez a3, .LBB97_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a1, a2, 2
-; RV32ZVE32F-NEXT: bnez a1, .LBB87_11
-; RV32ZVE32F-NEXT: .LBB87_2: # %else2
+; RV32ZVE32F-NEXT: bnez a1, .LBB97_11
+; RV32ZVE32F-NEXT: .LBB97_2: # %else2
; RV32ZVE32F-NEXT: andi a1, a2, 4
-; RV32ZVE32F-NEXT: bnez a1, .LBB87_12
-; RV32ZVE32F-NEXT: .LBB87_3: # %else5
+; RV32ZVE32F-NEXT: bnez a1, .LBB97_12
+; RV32ZVE32F-NEXT: .LBB97_3: # %else5
; RV32ZVE32F-NEXT: andi a1, a2, 8
-; RV32ZVE32F-NEXT: bnez a1, .LBB87_13
-; RV32ZVE32F-NEXT: .LBB87_4: # %else8
+; RV32ZVE32F-NEXT: bnez a1, .LBB97_13
+; RV32ZVE32F-NEXT: .LBB97_4: # %else8
; RV32ZVE32F-NEXT: andi a1, a2, 16
-; RV32ZVE32F-NEXT: bnez a1, .LBB87_14
-; RV32ZVE32F-NEXT: .LBB87_5: # %else11
+; RV32ZVE32F-NEXT: bnez a1, .LBB97_14
+; RV32ZVE32F-NEXT: .LBB97_5: # %else11
; RV32ZVE32F-NEXT: andi a1, a2, 32
-; RV32ZVE32F-NEXT: bnez a1, .LBB87_15
-; RV32ZVE32F-NEXT: .LBB87_6: # %else14
+; RV32ZVE32F-NEXT: bnez a1, .LBB97_15
+; RV32ZVE32F-NEXT: .LBB97_6: # %else14
; RV32ZVE32F-NEXT: andi a1, a2, 64
-; RV32ZVE32F-NEXT: bnez a1, .LBB87_16
-; RV32ZVE32F-NEXT: .LBB87_7: # %else17
+; RV32ZVE32F-NEXT: bnez a1, .LBB97_16
+; RV32ZVE32F-NEXT: .LBB97_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a2, -128
-; RV32ZVE32F-NEXT: beqz a1, .LBB87_9
-; RV32ZVE32F-NEXT: .LBB87_8: # %cond.load19
+; RV32ZVE32F-NEXT: beqz a1, .LBB97_9
+; RV32ZVE32F-NEXT: .LBB97_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
-; RV32ZVE32F-NEXT: .LBB87_9: # %else20
+; RV32ZVE32F-NEXT: .LBB97_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
@@ -9753,69 +14897,69 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB87_10: # %cond.load
+; RV32ZVE32F-NEXT: .LBB97_10: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 2
-; RV32ZVE32F-NEXT: beqz a1, .LBB87_2
-; RV32ZVE32F-NEXT: .LBB87_11: # %cond.load1
+; RV32ZVE32F-NEXT: beqz a1, .LBB97_2
+; RV32ZVE32F-NEXT: .LBB97_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 4
-; RV32ZVE32F-NEXT: beqz a1, .LBB87_3
-; RV32ZVE32F-NEXT: .LBB87_12: # %cond.load4
+; RV32ZVE32F-NEXT: beqz a1, .LBB97_3
+; RV32ZVE32F-NEXT: .LBB97_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 8
-; RV32ZVE32F-NEXT: beqz a1, .LBB87_4
-; RV32ZVE32F-NEXT: .LBB87_13: # %cond.load7
+; RV32ZVE32F-NEXT: beqz a1, .LBB97_4
+; RV32ZVE32F-NEXT: .LBB97_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 16
-; RV32ZVE32F-NEXT: beqz a1, .LBB87_5
-; RV32ZVE32F-NEXT: .LBB87_14: # %cond.load10
+; RV32ZVE32F-NEXT: beqz a1, .LBB97_5
+; RV32ZVE32F-NEXT: .LBB97_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 32
-; RV32ZVE32F-NEXT: beqz a1, .LBB87_6
-; RV32ZVE32F-NEXT: .LBB87_15: # %cond.load13
+; RV32ZVE32F-NEXT: beqz a1, .LBB97_6
+; RV32ZVE32F-NEXT: .LBB97_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 64
-; RV32ZVE32F-NEXT: beqz a1, .LBB87_7
-; RV32ZVE32F-NEXT: .LBB87_16: # %cond.load16
+; RV32ZVE32F-NEXT: beqz a1, .LBB97_7
+; RV32ZVE32F-NEXT: .LBB97_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, -128
-; RV32ZVE32F-NEXT: bnez a1, .LBB87_8
-; RV32ZVE32F-NEXT: j .LBB87_9
+; RV32ZVE32F-NEXT: bnez a1, .LBB97_8
+; RV32ZVE32F-NEXT: j .LBB97_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB87_2
+; RV64ZVE32F-NEXT: beqz a3, .LBB97_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
-; RV64ZVE32F-NEXT: .LBB87_2: # %else
+; RV64ZVE32F-NEXT: .LBB97_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB87_4
+; RV64ZVE32F-NEXT: beqz a3, .LBB97_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
@@ -9823,47 +14967,47 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
-; RV64ZVE32F-NEXT: .LBB87_4: # %else2
+; RV64ZVE32F-NEXT: .LBB97_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB87_14
+; RV64ZVE32F-NEXT: bnez a3, .LBB97_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: bnez a3, .LBB87_15
-; RV64ZVE32F-NEXT: .LBB87_6: # %else8
+; RV64ZVE32F-NEXT: bnez a3, .LBB97_15
+; RV64ZVE32F-NEXT: .LBB97_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: bnez a3, .LBB87_16
-; RV64ZVE32F-NEXT: .LBB87_7: # %else11
+; RV64ZVE32F-NEXT: bnez a3, .LBB97_16
+; RV64ZVE32F-NEXT: .LBB97_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: beqz a3, .LBB87_9
-; RV64ZVE32F-NEXT: .LBB87_8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a3, .LBB97_9
+; RV64ZVE32F-NEXT: .LBB97_8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
-; RV64ZVE32F-NEXT: .LBB87_9: # %else14
+; RV64ZVE32F-NEXT: .LBB97_9: # %else14
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB87_11
+; RV64ZVE32F-NEXT: beqz a3, .LBB97_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
-; RV64ZVE32F-NEXT: .LBB87_11: # %else17
+; RV64ZVE32F-NEXT: .LBB97_11: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
-; RV64ZVE32F-NEXT: beqz a2, .LBB87_13
+; RV64ZVE32F-NEXT: beqz a2, .LBB97_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
-; RV64ZVE32F-NEXT: .LBB87_13: # %else20
+; RV64ZVE32F-NEXT: .LBB97_13: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
@@ -9873,29 +15017,29 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB87_14: # %cond.load4
+; RV64ZVE32F-NEXT: .LBB97_14: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB87_6
-; RV64ZVE32F-NEXT: .LBB87_15: # %cond.load7
+; RV64ZVE32F-NEXT: beqz a3, .LBB97_6
+; RV64ZVE32F-NEXT: .LBB97_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: beqz a3, .LBB87_7
-; RV64ZVE32F-NEXT: .LBB87_16: # %cond.load10
+; RV64ZVE32F-NEXT: beqz a3, .LBB97_7
+; RV64ZVE32F-NEXT: .LBB97_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: bnez a3, .LBB87_8
-; RV64ZVE32F-NEXT: j .LBB87_9
+; RV64ZVE32F-NEXT: bnez a3, .LBB97_8
+; RV64ZVE32F-NEXT: j .LBB97_9
%ptrs = getelementptr inbounds double, ptr %base, <8 x i8> %idxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
@@ -9931,34 +15075,34 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV32ZVE32F-NEXT: andi a3, a2, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
-; RV32ZVE32F-NEXT: bnez a3, .LBB88_10
+; RV32ZVE32F-NEXT: bnez a3, .LBB98_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a1, a2, 2
-; RV32ZVE32F-NEXT: bnez a1, .LBB88_11
-; RV32ZVE32F-NEXT: .LBB88_2: # %else2
+; RV32ZVE32F-NEXT: bnez a1, .LBB98_11
+; RV32ZVE32F-NEXT: .LBB98_2: # %else2
; RV32ZVE32F-NEXT: andi a1, a2, 4
-; RV32ZVE32F-NEXT: bnez a1, .LBB88_12
-; RV32ZVE32F-NEXT: .LBB88_3: # %else5
+; RV32ZVE32F-NEXT: bnez a1, .LBB98_12
+; RV32ZVE32F-NEXT: .LBB98_3: # %else5
; RV32ZVE32F-NEXT: andi a1, a2, 8
-; RV32ZVE32F-NEXT: bnez a1, .LBB88_13
-; RV32ZVE32F-NEXT: .LBB88_4: # %else8
+; RV32ZVE32F-NEXT: bnez a1, .LBB98_13
+; RV32ZVE32F-NEXT: .LBB98_4: # %else8
; RV32ZVE32F-NEXT: andi a1, a2, 16
-; RV32ZVE32F-NEXT: bnez a1, .LBB88_14
-; RV32ZVE32F-NEXT: .LBB88_5: # %else11
+; RV32ZVE32F-NEXT: bnez a1, .LBB98_14
+; RV32ZVE32F-NEXT: .LBB98_5: # %else11
; RV32ZVE32F-NEXT: andi a1, a2, 32
-; RV32ZVE32F-NEXT: bnez a1, .LBB88_15
-; RV32ZVE32F-NEXT: .LBB88_6: # %else14
+; RV32ZVE32F-NEXT: bnez a1, .LBB98_15
+; RV32ZVE32F-NEXT: .LBB98_6: # %else14
; RV32ZVE32F-NEXT: andi a1, a2, 64
-; RV32ZVE32F-NEXT: bnez a1, .LBB88_16
-; RV32ZVE32F-NEXT: .LBB88_7: # %else17
+; RV32ZVE32F-NEXT: bnez a1, .LBB98_16
+; RV32ZVE32F-NEXT: .LBB98_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a2, -128
-; RV32ZVE32F-NEXT: beqz a1, .LBB88_9
-; RV32ZVE32F-NEXT: .LBB88_8: # %cond.load19
+; RV32ZVE32F-NEXT: beqz a1, .LBB98_9
+; RV32ZVE32F-NEXT: .LBB98_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
-; RV32ZVE32F-NEXT: .LBB88_9: # %else20
+; RV32ZVE32F-NEXT: .LBB98_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
@@ -9968,69 +15112,69 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB88_10: # %cond.load
+; RV32ZVE32F-NEXT: .LBB98_10: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 2
-; RV32ZVE32F-NEXT: beqz a1, .LBB88_2
-; RV32ZVE32F-NEXT: .LBB88_11: # %cond.load1
+; RV32ZVE32F-NEXT: beqz a1, .LBB98_2
+; RV32ZVE32F-NEXT: .LBB98_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 4
-; RV32ZVE32F-NEXT: beqz a1, .LBB88_3
-; RV32ZVE32F-NEXT: .LBB88_12: # %cond.load4
+; RV32ZVE32F-NEXT: beqz a1, .LBB98_3
+; RV32ZVE32F-NEXT: .LBB98_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 8
-; RV32ZVE32F-NEXT: beqz a1, .LBB88_4
-; RV32ZVE32F-NEXT: .LBB88_13: # %cond.load7
+; RV32ZVE32F-NEXT: beqz a1, .LBB98_4
+; RV32ZVE32F-NEXT: .LBB98_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 16
-; RV32ZVE32F-NEXT: beqz a1, .LBB88_5
-; RV32ZVE32F-NEXT: .LBB88_14: # %cond.load10
+; RV32ZVE32F-NEXT: beqz a1, .LBB98_5
+; RV32ZVE32F-NEXT: .LBB98_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 32
-; RV32ZVE32F-NEXT: beqz a1, .LBB88_6
-; RV32ZVE32F-NEXT: .LBB88_15: # %cond.load13
+; RV32ZVE32F-NEXT: beqz a1, .LBB98_6
+; RV32ZVE32F-NEXT: .LBB98_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 64
-; RV32ZVE32F-NEXT: beqz a1, .LBB88_7
-; RV32ZVE32F-NEXT: .LBB88_16: # %cond.load16
+; RV32ZVE32F-NEXT: beqz a1, .LBB98_7
+; RV32ZVE32F-NEXT: .LBB98_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, -128
-; RV32ZVE32F-NEXT: bnez a1, .LBB88_8
-; RV32ZVE32F-NEXT: j .LBB88_9
+; RV32ZVE32F-NEXT: bnez a1, .LBB98_8
+; RV32ZVE32F-NEXT: j .LBB98_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB88_2
+; RV64ZVE32F-NEXT: beqz a3, .LBB98_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
-; RV64ZVE32F-NEXT: .LBB88_2: # %else
+; RV64ZVE32F-NEXT: .LBB98_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB88_4
+; RV64ZVE32F-NEXT: beqz a3, .LBB98_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
@@ -10038,47 +15182,47 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
-; RV64ZVE32F-NEXT: .LBB88_4: # %else2
+; RV64ZVE32F-NEXT: .LBB98_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB88_14
+; RV64ZVE32F-NEXT: bnez a3, .LBB98_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: bnez a3, .LBB88_15
-; RV64ZVE32F-NEXT: .LBB88_6: # %else8
+; RV64ZVE32F-NEXT: bnez a3, .LBB98_15
+; RV64ZVE32F-NEXT: .LBB98_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: bnez a3, .LBB88_16
-; RV64ZVE32F-NEXT: .LBB88_7: # %else11
+; RV64ZVE32F-NEXT: bnez a3, .LBB98_16
+; RV64ZVE32F-NEXT: .LBB98_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: beqz a3, .LBB88_9
-; RV64ZVE32F-NEXT: .LBB88_8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a3, .LBB98_9
+; RV64ZVE32F-NEXT: .LBB98_8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
-; RV64ZVE32F-NEXT: .LBB88_9: # %else14
+; RV64ZVE32F-NEXT: .LBB98_9: # %else14
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB88_11
+; RV64ZVE32F-NEXT: beqz a3, .LBB98_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
-; RV64ZVE32F-NEXT: .LBB88_11: # %else17
+; RV64ZVE32F-NEXT: .LBB98_11: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
-; RV64ZVE32F-NEXT: beqz a2, .LBB88_13
+; RV64ZVE32F-NEXT: beqz a2, .LBB98_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
-; RV64ZVE32F-NEXT: .LBB88_13: # %else20
+; RV64ZVE32F-NEXT: .LBB98_13: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
@@ -10088,29 +15232,29 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB88_14: # %cond.load4
+; RV64ZVE32F-NEXT: .LBB98_14: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB88_6
-; RV64ZVE32F-NEXT: .LBB88_15: # %cond.load7
+; RV64ZVE32F-NEXT: beqz a3, .LBB98_6
+; RV64ZVE32F-NEXT: .LBB98_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: beqz a3, .LBB88_7
-; RV64ZVE32F-NEXT: .LBB88_16: # %cond.load10
+; RV64ZVE32F-NEXT: beqz a3, .LBB98_7
+; RV64ZVE32F-NEXT: .LBB98_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: bnez a3, .LBB88_8
-; RV64ZVE32F-NEXT: j .LBB88_9
+; RV64ZVE32F-NEXT: bnez a3, .LBB98_8
+; RV64ZVE32F-NEXT: j .LBB98_9
%eidxs = sext <8 x i8> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
@@ -10148,34 +15292,34 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV32ZVE32F-NEXT: andi a3, a2, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
-; RV32ZVE32F-NEXT: bnez a3, .LBB89_10
+; RV32ZVE32F-NEXT: bnez a3, .LBB99_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a1, a2, 2
-; RV32ZVE32F-NEXT: bnez a1, .LBB89_11
-; RV32ZVE32F-NEXT: .LBB89_2: # %else2
+; RV32ZVE32F-NEXT: bnez a1, .LBB99_11
+; RV32ZVE32F-NEXT: .LBB99_2: # %else2
; RV32ZVE32F-NEXT: andi a1, a2, 4
-; RV32ZVE32F-NEXT: bnez a1, .LBB89_12
-; RV32ZVE32F-NEXT: .LBB89_3: # %else5
+; RV32ZVE32F-NEXT: bnez a1, .LBB99_12
+; RV32ZVE32F-NEXT: .LBB99_3: # %else5
; RV32ZVE32F-NEXT: andi a1, a2, 8
-; RV32ZVE32F-NEXT: bnez a1, .LBB89_13
-; RV32ZVE32F-NEXT: .LBB89_4: # %else8
+; RV32ZVE32F-NEXT: bnez a1, .LBB99_13
+; RV32ZVE32F-NEXT: .LBB99_4: # %else8
; RV32ZVE32F-NEXT: andi a1, a2, 16
-; RV32ZVE32F-NEXT: bnez a1, .LBB89_14
-; RV32ZVE32F-NEXT: .LBB89_5: # %else11
+; RV32ZVE32F-NEXT: bnez a1, .LBB99_14
+; RV32ZVE32F-NEXT: .LBB99_5: # %else11
; RV32ZVE32F-NEXT: andi a1, a2, 32
-; RV32ZVE32F-NEXT: bnez a1, .LBB89_15
-; RV32ZVE32F-NEXT: .LBB89_6: # %else14
+; RV32ZVE32F-NEXT: bnez a1, .LBB99_15
+; RV32ZVE32F-NEXT: .LBB99_6: # %else14
; RV32ZVE32F-NEXT: andi a1, a2, 64
-; RV32ZVE32F-NEXT: bnez a1, .LBB89_16
-; RV32ZVE32F-NEXT: .LBB89_7: # %else17
+; RV32ZVE32F-NEXT: bnez a1, .LBB99_16
+; RV32ZVE32F-NEXT: .LBB99_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a2, -128
-; RV32ZVE32F-NEXT: beqz a1, .LBB89_9
-; RV32ZVE32F-NEXT: .LBB89_8: # %cond.load19
+; RV32ZVE32F-NEXT: beqz a1, .LBB99_9
+; RV32ZVE32F-NEXT: .LBB99_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
-; RV32ZVE32F-NEXT: .LBB89_9: # %else20
+; RV32ZVE32F-NEXT: .LBB99_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
@@ -10185,70 +15329,70 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB89_10: # %cond.load
+; RV32ZVE32F-NEXT: .LBB99_10: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 2
-; RV32ZVE32F-NEXT: beqz a1, .LBB89_2
-; RV32ZVE32F-NEXT: .LBB89_11: # %cond.load1
+; RV32ZVE32F-NEXT: beqz a1, .LBB99_2
+; RV32ZVE32F-NEXT: .LBB99_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 4
-; RV32ZVE32F-NEXT: beqz a1, .LBB89_3
-; RV32ZVE32F-NEXT: .LBB89_12: # %cond.load4
+; RV32ZVE32F-NEXT: beqz a1, .LBB99_3
+; RV32ZVE32F-NEXT: .LBB99_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 8
-; RV32ZVE32F-NEXT: beqz a1, .LBB89_4
-; RV32ZVE32F-NEXT: .LBB89_13: # %cond.load7
+; RV32ZVE32F-NEXT: beqz a1, .LBB99_4
+; RV32ZVE32F-NEXT: .LBB99_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 16
-; RV32ZVE32F-NEXT: beqz a1, .LBB89_5
-; RV32ZVE32F-NEXT: .LBB89_14: # %cond.load10
+; RV32ZVE32F-NEXT: beqz a1, .LBB99_5
+; RV32ZVE32F-NEXT: .LBB99_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 32
-; RV32ZVE32F-NEXT: beqz a1, .LBB89_6
-; RV32ZVE32F-NEXT: .LBB89_15: # %cond.load13
+; RV32ZVE32F-NEXT: beqz a1, .LBB99_6
+; RV32ZVE32F-NEXT: .LBB99_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 64
-; RV32ZVE32F-NEXT: beqz a1, .LBB89_7
-; RV32ZVE32F-NEXT: .LBB89_16: # %cond.load16
+; RV32ZVE32F-NEXT: beqz a1, .LBB99_7
+; RV32ZVE32F-NEXT: .LBB99_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, -128
-; RV32ZVE32F-NEXT: bnez a1, .LBB89_8
-; RV32ZVE32F-NEXT: j .LBB89_9
+; RV32ZVE32F-NEXT: bnez a1, .LBB99_8
+; RV32ZVE32F-NEXT: j .LBB99_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB89_2
+; RV64ZVE32F-NEXT: beqz a3, .LBB99_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: andi a3, a3, 255
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
-; RV64ZVE32F-NEXT: .LBB89_2: # %else
+; RV64ZVE32F-NEXT: .LBB99_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB89_4
+; RV64ZVE32F-NEXT: beqz a3, .LBB99_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
@@ -10257,42 +15401,42 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
-; RV64ZVE32F-NEXT: .LBB89_4: # %else2
+; RV64ZVE32F-NEXT: .LBB99_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB89_14
+; RV64ZVE32F-NEXT: bnez a3, .LBB99_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: bnez a3, .LBB89_15
-; RV64ZVE32F-NEXT: .LBB89_6: # %else8
+; RV64ZVE32F-NEXT: bnez a3, .LBB99_15
+; RV64ZVE32F-NEXT: .LBB99_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: bnez a3, .LBB89_16
-; RV64ZVE32F-NEXT: .LBB89_7: # %else11
+; RV64ZVE32F-NEXT: bnez a3, .LBB99_16
+; RV64ZVE32F-NEXT: .LBB99_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: beqz a3, .LBB89_9
-; RV64ZVE32F-NEXT: .LBB89_8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a3, .LBB99_9
+; RV64ZVE32F-NEXT: .LBB99_8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: andi a3, a3, 255
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
-; RV64ZVE32F-NEXT: .LBB89_9: # %else14
+; RV64ZVE32F-NEXT: .LBB99_9: # %else14
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB89_11
+; RV64ZVE32F-NEXT: beqz a3, .LBB99_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: andi a3, a3, 255
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
-; RV64ZVE32F-NEXT: .LBB89_11: # %else17
+; RV64ZVE32F-NEXT: .LBB99_11: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
-; RV64ZVE32F-NEXT: beqz a2, .LBB89_13
+; RV64ZVE32F-NEXT: beqz a2, .LBB99_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -10300,7 +15444,7 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
-; RV64ZVE32F-NEXT: .LBB89_13: # %else20
+; RV64ZVE32F-NEXT: .LBB99_13: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
@@ -10310,15 +15454,15 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB89_14: # %cond.load4
+; RV64ZVE32F-NEXT: .LBB99_14: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: andi a3, a3, 255
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB89_6
-; RV64ZVE32F-NEXT: .LBB89_15: # %cond.load7
+; RV64ZVE32F-NEXT: beqz a3, .LBB99_6
+; RV64ZVE32F-NEXT: .LBB99_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: andi a3, a3, 255
@@ -10326,16 +15470,16 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs,
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: beqz a3, .LBB89_7
-; RV64ZVE32F-NEXT: .LBB89_16: # %cond.load10
+; RV64ZVE32F-NEXT: beqz a3, .LBB99_7
+; RV64ZVE32F-NEXT: .LBB99_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: andi a3, a3, 255
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: bnez a3, .LBB89_8
-; RV64ZVE32F-NEXT: j .LBB89_9
+; RV64ZVE32F-NEXT: bnez a3, .LBB99_8
+; RV64ZVE32F-NEXT: j .LBB99_9
%eidxs = zext <8 x i8> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
@@ -10372,34 +15516,34 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8
; RV32ZVE32F-NEXT: andi a3, a2, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
-; RV32ZVE32F-NEXT: bnez a3, .LBB90_10
+; RV32ZVE32F-NEXT: bnez a3, .LBB100_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a1, a2, 2
-; RV32ZVE32F-NEXT: bnez a1, .LBB90_11
-; RV32ZVE32F-NEXT: .LBB90_2: # %else2
+; RV32ZVE32F-NEXT: bnez a1, .LBB100_11
+; RV32ZVE32F-NEXT: .LBB100_2: # %else2
; RV32ZVE32F-NEXT: andi a1, a2, 4
-; RV32ZVE32F-NEXT: bnez a1, .LBB90_12
-; RV32ZVE32F-NEXT: .LBB90_3: # %else5
+; RV32ZVE32F-NEXT: bnez a1, .LBB100_12
+; RV32ZVE32F-NEXT: .LBB100_3: # %else5
; RV32ZVE32F-NEXT: andi a1, a2, 8
-; RV32ZVE32F-NEXT: bnez a1, .LBB90_13
-; RV32ZVE32F-NEXT: .LBB90_4: # %else8
+; RV32ZVE32F-NEXT: bnez a1, .LBB100_13
+; RV32ZVE32F-NEXT: .LBB100_4: # %else8
; RV32ZVE32F-NEXT: andi a1, a2, 16
-; RV32ZVE32F-NEXT: bnez a1, .LBB90_14
-; RV32ZVE32F-NEXT: .LBB90_5: # %else11
+; RV32ZVE32F-NEXT: bnez a1, .LBB100_14
+; RV32ZVE32F-NEXT: .LBB100_5: # %else11
; RV32ZVE32F-NEXT: andi a1, a2, 32
-; RV32ZVE32F-NEXT: bnez a1, .LBB90_15
-; RV32ZVE32F-NEXT: .LBB90_6: # %else14
+; RV32ZVE32F-NEXT: bnez a1, .LBB100_15
+; RV32ZVE32F-NEXT: .LBB100_6: # %else14
; RV32ZVE32F-NEXT: andi a1, a2, 64
-; RV32ZVE32F-NEXT: bnez a1, .LBB90_16
-; RV32ZVE32F-NEXT: .LBB90_7: # %else17
+; RV32ZVE32F-NEXT: bnez a1, .LBB100_16
+; RV32ZVE32F-NEXT: .LBB100_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a2, -128
-; RV32ZVE32F-NEXT: beqz a1, .LBB90_9
-; RV32ZVE32F-NEXT: .LBB90_8: # %cond.load19
+; RV32ZVE32F-NEXT: beqz a1, .LBB100_9
+; RV32ZVE32F-NEXT: .LBB100_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
-; RV32ZVE32F-NEXT: .LBB90_9: # %else20
+; RV32ZVE32F-NEXT: .LBB100_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
@@ -10409,70 +15553,70 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB90_10: # %cond.load
+; RV32ZVE32F-NEXT: .LBB100_10: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 2
-; RV32ZVE32F-NEXT: beqz a1, .LBB90_2
-; RV32ZVE32F-NEXT: .LBB90_11: # %cond.load1
+; RV32ZVE32F-NEXT: beqz a1, .LBB100_2
+; RV32ZVE32F-NEXT: .LBB100_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 4
-; RV32ZVE32F-NEXT: beqz a1, .LBB90_3
-; RV32ZVE32F-NEXT: .LBB90_12: # %cond.load4
+; RV32ZVE32F-NEXT: beqz a1, .LBB100_3
+; RV32ZVE32F-NEXT: .LBB100_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 8
-; RV32ZVE32F-NEXT: beqz a1, .LBB90_4
-; RV32ZVE32F-NEXT: .LBB90_13: # %cond.load7
+; RV32ZVE32F-NEXT: beqz a1, .LBB100_4
+; RV32ZVE32F-NEXT: .LBB100_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 16
-; RV32ZVE32F-NEXT: beqz a1, .LBB90_5
-; RV32ZVE32F-NEXT: .LBB90_14: # %cond.load10
+; RV32ZVE32F-NEXT: beqz a1, .LBB100_5
+; RV32ZVE32F-NEXT: .LBB100_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 32
-; RV32ZVE32F-NEXT: beqz a1, .LBB90_6
-; RV32ZVE32F-NEXT: .LBB90_15: # %cond.load13
+; RV32ZVE32F-NEXT: beqz a1, .LBB100_6
+; RV32ZVE32F-NEXT: .LBB100_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 64
-; RV32ZVE32F-NEXT: beqz a1, .LBB90_7
-; RV32ZVE32F-NEXT: .LBB90_16: # %cond.load16
+; RV32ZVE32F-NEXT: beqz a1, .LBB100_7
+; RV32ZVE32F-NEXT: .LBB100_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, -128
-; RV32ZVE32F-NEXT: bnez a1, .LBB90_8
-; RV32ZVE32F-NEXT: j .LBB90_9
+; RV32ZVE32F-NEXT: bnez a1, .LBB100_8
+; RV32ZVE32F-NEXT: j .LBB100_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB90_2
+; RV64ZVE32F-NEXT: beqz a3, .LBB100_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
-; RV64ZVE32F-NEXT: .LBB90_2: # %else
+; RV64ZVE32F-NEXT: .LBB100_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB90_4
+; RV64ZVE32F-NEXT: beqz a3, .LBB100_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
@@ -10480,47 +15624,47 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
-; RV64ZVE32F-NEXT: .LBB90_4: # %else2
+; RV64ZVE32F-NEXT: .LBB100_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB90_14
+; RV64ZVE32F-NEXT: bnez a3, .LBB100_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: bnez a3, .LBB90_15
-; RV64ZVE32F-NEXT: .LBB90_6: # %else8
+; RV64ZVE32F-NEXT: bnez a3, .LBB100_15
+; RV64ZVE32F-NEXT: .LBB100_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: bnez a3, .LBB90_16
-; RV64ZVE32F-NEXT: .LBB90_7: # %else11
+; RV64ZVE32F-NEXT: bnez a3, .LBB100_16
+; RV64ZVE32F-NEXT: .LBB100_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: beqz a3, .LBB90_9
-; RV64ZVE32F-NEXT: .LBB90_8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a3, .LBB100_9
+; RV64ZVE32F-NEXT: .LBB100_8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
-; RV64ZVE32F-NEXT: .LBB90_9: # %else14
+; RV64ZVE32F-NEXT: .LBB100_9: # %else14
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB90_11
+; RV64ZVE32F-NEXT: beqz a3, .LBB100_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
-; RV64ZVE32F-NEXT: .LBB90_11: # %else17
+; RV64ZVE32F-NEXT: .LBB100_11: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
-; RV64ZVE32F-NEXT: beqz a2, .LBB90_13
+; RV64ZVE32F-NEXT: beqz a2, .LBB100_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
-; RV64ZVE32F-NEXT: .LBB90_13: # %else20
+; RV64ZVE32F-NEXT: .LBB100_13: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
@@ -10530,29 +15674,29 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB90_14: # %cond.load4
+; RV64ZVE32F-NEXT: .LBB100_14: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB90_6
-; RV64ZVE32F-NEXT: .LBB90_15: # %cond.load7
+; RV64ZVE32F-NEXT: beqz a3, .LBB100_6
+; RV64ZVE32F-NEXT: .LBB100_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: beqz a3, .LBB90_7
-; RV64ZVE32F-NEXT: .LBB90_16: # %cond.load10
+; RV64ZVE32F-NEXT: beqz a3, .LBB100_7
+; RV64ZVE32F-NEXT: .LBB100_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: bnez a3, .LBB90_8
-; RV64ZVE32F-NEXT: j .LBB90_9
+; RV64ZVE32F-NEXT: bnez a3, .LBB100_8
+; RV64ZVE32F-NEXT: j .LBB100_9
%ptrs = getelementptr inbounds double, ptr %base, <8 x i16> %idxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
@@ -10588,34 +15732,34 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV32ZVE32F-NEXT: andi a3, a2, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
-; RV32ZVE32F-NEXT: bnez a3, .LBB91_10
+; RV32ZVE32F-NEXT: bnez a3, .LBB101_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a1, a2, 2
-; RV32ZVE32F-NEXT: bnez a1, .LBB91_11
-; RV32ZVE32F-NEXT: .LBB91_2: # %else2
+; RV32ZVE32F-NEXT: bnez a1, .LBB101_11
+; RV32ZVE32F-NEXT: .LBB101_2: # %else2
; RV32ZVE32F-NEXT: andi a1, a2, 4
-; RV32ZVE32F-NEXT: bnez a1, .LBB91_12
-; RV32ZVE32F-NEXT: .LBB91_3: # %else5
+; RV32ZVE32F-NEXT: bnez a1, .LBB101_12
+; RV32ZVE32F-NEXT: .LBB101_3: # %else5
; RV32ZVE32F-NEXT: andi a1, a2, 8
-; RV32ZVE32F-NEXT: bnez a1, .LBB91_13
-; RV32ZVE32F-NEXT: .LBB91_4: # %else8
+; RV32ZVE32F-NEXT: bnez a1, .LBB101_13
+; RV32ZVE32F-NEXT: .LBB101_4: # %else8
; RV32ZVE32F-NEXT: andi a1, a2, 16
-; RV32ZVE32F-NEXT: bnez a1, .LBB91_14
-; RV32ZVE32F-NEXT: .LBB91_5: # %else11
+; RV32ZVE32F-NEXT: bnez a1, .LBB101_14
+; RV32ZVE32F-NEXT: .LBB101_5: # %else11
; RV32ZVE32F-NEXT: andi a1, a2, 32
-; RV32ZVE32F-NEXT: bnez a1, .LBB91_15
-; RV32ZVE32F-NEXT: .LBB91_6: # %else14
+; RV32ZVE32F-NEXT: bnez a1, .LBB101_15
+; RV32ZVE32F-NEXT: .LBB101_6: # %else14
; RV32ZVE32F-NEXT: andi a1, a2, 64
-; RV32ZVE32F-NEXT: bnez a1, .LBB91_16
-; RV32ZVE32F-NEXT: .LBB91_7: # %else17
+; RV32ZVE32F-NEXT: bnez a1, .LBB101_16
+; RV32ZVE32F-NEXT: .LBB101_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a2, -128
-; RV32ZVE32F-NEXT: beqz a1, .LBB91_9
-; RV32ZVE32F-NEXT: .LBB91_8: # %cond.load19
+; RV32ZVE32F-NEXT: beqz a1, .LBB101_9
+; RV32ZVE32F-NEXT: .LBB101_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
-; RV32ZVE32F-NEXT: .LBB91_9: # %else20
+; RV32ZVE32F-NEXT: .LBB101_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
@@ -10625,70 +15769,70 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB91_10: # %cond.load
+; RV32ZVE32F-NEXT: .LBB101_10: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 2
-; RV32ZVE32F-NEXT: beqz a1, .LBB91_2
-; RV32ZVE32F-NEXT: .LBB91_11: # %cond.load1
+; RV32ZVE32F-NEXT: beqz a1, .LBB101_2
+; RV32ZVE32F-NEXT: .LBB101_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 4
-; RV32ZVE32F-NEXT: beqz a1, .LBB91_3
-; RV32ZVE32F-NEXT: .LBB91_12: # %cond.load4
+; RV32ZVE32F-NEXT: beqz a1, .LBB101_3
+; RV32ZVE32F-NEXT: .LBB101_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 8
-; RV32ZVE32F-NEXT: beqz a1, .LBB91_4
-; RV32ZVE32F-NEXT: .LBB91_13: # %cond.load7
+; RV32ZVE32F-NEXT: beqz a1, .LBB101_4
+; RV32ZVE32F-NEXT: .LBB101_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 16
-; RV32ZVE32F-NEXT: beqz a1, .LBB91_5
-; RV32ZVE32F-NEXT: .LBB91_14: # %cond.load10
+; RV32ZVE32F-NEXT: beqz a1, .LBB101_5
+; RV32ZVE32F-NEXT: .LBB101_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 32
-; RV32ZVE32F-NEXT: beqz a1, .LBB91_6
-; RV32ZVE32F-NEXT: .LBB91_15: # %cond.load13
+; RV32ZVE32F-NEXT: beqz a1, .LBB101_6
+; RV32ZVE32F-NEXT: .LBB101_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 64
-; RV32ZVE32F-NEXT: beqz a1, .LBB91_7
-; RV32ZVE32F-NEXT: .LBB91_16: # %cond.load16
+; RV32ZVE32F-NEXT: beqz a1, .LBB101_7
+; RV32ZVE32F-NEXT: .LBB101_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, -128
-; RV32ZVE32F-NEXT: bnez a1, .LBB91_8
-; RV32ZVE32F-NEXT: j .LBB91_9
+; RV32ZVE32F-NEXT: bnez a1, .LBB101_8
+; RV32ZVE32F-NEXT: j .LBB101_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB91_2
+; RV64ZVE32F-NEXT: beqz a3, .LBB101_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
-; RV64ZVE32F-NEXT: .LBB91_2: # %else
+; RV64ZVE32F-NEXT: .LBB101_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB91_4
+; RV64ZVE32F-NEXT: beqz a3, .LBB101_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
@@ -10696,47 +15840,47 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
-; RV64ZVE32F-NEXT: .LBB91_4: # %else2
+; RV64ZVE32F-NEXT: .LBB101_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB91_14
+; RV64ZVE32F-NEXT: bnez a3, .LBB101_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: bnez a3, .LBB91_15
-; RV64ZVE32F-NEXT: .LBB91_6: # %else8
+; RV64ZVE32F-NEXT: bnez a3, .LBB101_15
+; RV64ZVE32F-NEXT: .LBB101_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: bnez a3, .LBB91_16
-; RV64ZVE32F-NEXT: .LBB91_7: # %else11
+; RV64ZVE32F-NEXT: bnez a3, .LBB101_16
+; RV64ZVE32F-NEXT: .LBB101_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: beqz a3, .LBB91_9
-; RV64ZVE32F-NEXT: .LBB91_8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a3, .LBB101_9
+; RV64ZVE32F-NEXT: .LBB101_8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
-; RV64ZVE32F-NEXT: .LBB91_9: # %else14
+; RV64ZVE32F-NEXT: .LBB101_9: # %else14
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB91_11
+; RV64ZVE32F-NEXT: beqz a3, .LBB101_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
-; RV64ZVE32F-NEXT: .LBB91_11: # %else17
+; RV64ZVE32F-NEXT: .LBB101_11: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
-; RV64ZVE32F-NEXT: beqz a2, .LBB91_13
+; RV64ZVE32F-NEXT: beqz a2, .LBB101_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
-; RV64ZVE32F-NEXT: .LBB91_13: # %else20
+; RV64ZVE32F-NEXT: .LBB101_13: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
@@ -10746,29 +15890,29 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB91_14: # %cond.load4
+; RV64ZVE32F-NEXT: .LBB101_14: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB91_6
-; RV64ZVE32F-NEXT: .LBB91_15: # %cond.load7
+; RV64ZVE32F-NEXT: beqz a3, .LBB101_6
+; RV64ZVE32F-NEXT: .LBB101_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: beqz a3, .LBB91_7
-; RV64ZVE32F-NEXT: .LBB91_16: # %cond.load10
+; RV64ZVE32F-NEXT: beqz a3, .LBB101_7
+; RV64ZVE32F-NEXT: .LBB101_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: bnez a3, .LBB91_8
-; RV64ZVE32F-NEXT: j .LBB91_9
+; RV64ZVE32F-NEXT: bnez a3, .LBB101_8
+; RV64ZVE32F-NEXT: j .LBB101_9
%eidxs = sext <8 x i16> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
@@ -10806,34 +15950,34 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV32ZVE32F-NEXT: andi a3, a2, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
-; RV32ZVE32F-NEXT: bnez a3, .LBB92_10
+; RV32ZVE32F-NEXT: bnez a3, .LBB102_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a1, a2, 2
-; RV32ZVE32F-NEXT: bnez a1, .LBB92_11
-; RV32ZVE32F-NEXT: .LBB92_2: # %else2
+; RV32ZVE32F-NEXT: bnez a1, .LBB102_11
+; RV32ZVE32F-NEXT: .LBB102_2: # %else2
; RV32ZVE32F-NEXT: andi a1, a2, 4
-; RV32ZVE32F-NEXT: bnez a1, .LBB92_12
-; RV32ZVE32F-NEXT: .LBB92_3: # %else5
+; RV32ZVE32F-NEXT: bnez a1, .LBB102_12
+; RV32ZVE32F-NEXT: .LBB102_3: # %else5
; RV32ZVE32F-NEXT: andi a1, a2, 8
-; RV32ZVE32F-NEXT: bnez a1, .LBB92_13
-; RV32ZVE32F-NEXT: .LBB92_4: # %else8
+; RV32ZVE32F-NEXT: bnez a1, .LBB102_13
+; RV32ZVE32F-NEXT: .LBB102_4: # %else8
; RV32ZVE32F-NEXT: andi a1, a2, 16
-; RV32ZVE32F-NEXT: bnez a1, .LBB92_14
-; RV32ZVE32F-NEXT: .LBB92_5: # %else11
+; RV32ZVE32F-NEXT: bnez a1, .LBB102_14
+; RV32ZVE32F-NEXT: .LBB102_5: # %else11
; RV32ZVE32F-NEXT: andi a1, a2, 32
-; RV32ZVE32F-NEXT: bnez a1, .LBB92_15
-; RV32ZVE32F-NEXT: .LBB92_6: # %else14
+; RV32ZVE32F-NEXT: bnez a1, .LBB102_15
+; RV32ZVE32F-NEXT: .LBB102_6: # %else14
; RV32ZVE32F-NEXT: andi a1, a2, 64
-; RV32ZVE32F-NEXT: bnez a1, .LBB92_16
-; RV32ZVE32F-NEXT: .LBB92_7: # %else17
+; RV32ZVE32F-NEXT: bnez a1, .LBB102_16
+; RV32ZVE32F-NEXT: .LBB102_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a2, -128
-; RV32ZVE32F-NEXT: beqz a1, .LBB92_9
-; RV32ZVE32F-NEXT: .LBB92_8: # %cond.load19
+; RV32ZVE32F-NEXT: beqz a1, .LBB102_9
+; RV32ZVE32F-NEXT: .LBB102_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
-; RV32ZVE32F-NEXT: .LBB92_9: # %else20
+; RV32ZVE32F-NEXT: .LBB102_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
@@ -10843,54 +15987,54 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB92_10: # %cond.load
+; RV32ZVE32F-NEXT: .LBB102_10: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 2
-; RV32ZVE32F-NEXT: beqz a1, .LBB92_2
-; RV32ZVE32F-NEXT: .LBB92_11: # %cond.load1
+; RV32ZVE32F-NEXT: beqz a1, .LBB102_2
+; RV32ZVE32F-NEXT: .LBB102_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 4
-; RV32ZVE32F-NEXT: beqz a1, .LBB92_3
-; RV32ZVE32F-NEXT: .LBB92_12: # %cond.load4
+; RV32ZVE32F-NEXT: beqz a1, .LBB102_3
+; RV32ZVE32F-NEXT: .LBB102_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 8
-; RV32ZVE32F-NEXT: beqz a1, .LBB92_4
-; RV32ZVE32F-NEXT: .LBB92_13: # %cond.load7
+; RV32ZVE32F-NEXT: beqz a1, .LBB102_4
+; RV32ZVE32F-NEXT: .LBB102_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 16
-; RV32ZVE32F-NEXT: beqz a1, .LBB92_5
-; RV32ZVE32F-NEXT: .LBB92_14: # %cond.load10
+; RV32ZVE32F-NEXT: beqz a1, .LBB102_5
+; RV32ZVE32F-NEXT: .LBB102_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 32
-; RV32ZVE32F-NEXT: beqz a1, .LBB92_6
-; RV32ZVE32F-NEXT: .LBB92_15: # %cond.load13
+; RV32ZVE32F-NEXT: beqz a1, .LBB102_6
+; RV32ZVE32F-NEXT: .LBB102_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 64
-; RV32ZVE32F-NEXT: beqz a1, .LBB92_7
-; RV32ZVE32F-NEXT: .LBB92_16: # %cond.load16
+; RV32ZVE32F-NEXT: beqz a1, .LBB102_7
+; RV32ZVE32F-NEXT: .LBB102_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, -128
-; RV32ZVE32F-NEXT: bnez a1, .LBB92_8
-; RV32ZVE32F-NEXT: j .LBB92_9
+; RV32ZVE32F-NEXT: bnez a1, .LBB102_8
+; RV32ZVE32F-NEXT: j .LBB102_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8f64:
; RV64ZVE32F: # %bb.0:
@@ -10899,7 +16043,7 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi a4, a3, 1
; RV64ZVE32F-NEXT: addiw a2, a2, -1
-; RV64ZVE32F-NEXT: beqz a4, .LBB92_2
+; RV64ZVE32F-NEXT: beqz a4, .LBB102_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a4, v8
@@ -10907,9 +16051,9 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa0, 0(a4)
-; RV64ZVE32F-NEXT: .LBB92_2: # %else
+; RV64ZVE32F-NEXT: .LBB102_2: # %else
; RV64ZVE32F-NEXT: andi a4, a3, 2
-; RV64ZVE32F-NEXT: beqz a4, .LBB92_4
+; RV64ZVE32F-NEXT: beqz a4, .LBB102_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
@@ -10918,42 +16062,42 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa1, 0(a4)
-; RV64ZVE32F-NEXT: .LBB92_4: # %else2
+; RV64ZVE32F-NEXT: .LBB102_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a4, a3, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a4, .LBB92_14
+; RV64ZVE32F-NEXT: bnez a4, .LBB102_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a4, a3, 8
-; RV64ZVE32F-NEXT: bnez a4, .LBB92_15
-; RV64ZVE32F-NEXT: .LBB92_6: # %else8
+; RV64ZVE32F-NEXT: bnez a4, .LBB102_15
+; RV64ZVE32F-NEXT: .LBB102_6: # %else8
; RV64ZVE32F-NEXT: andi a4, a3, 16
-; RV64ZVE32F-NEXT: bnez a4, .LBB92_16
-; RV64ZVE32F-NEXT: .LBB92_7: # %else11
+; RV64ZVE32F-NEXT: bnez a4, .LBB102_16
+; RV64ZVE32F-NEXT: .LBB102_7: # %else11
; RV64ZVE32F-NEXT: andi a4, a3, 32
-; RV64ZVE32F-NEXT: beqz a4, .LBB92_9
-; RV64ZVE32F-NEXT: .LBB92_8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a4, .LBB102_9
+; RV64ZVE32F-NEXT: .LBB102_8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v8
; RV64ZVE32F-NEXT: and a4, a4, a2
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa5, 0(a4)
-; RV64ZVE32F-NEXT: .LBB92_9: # %else14
+; RV64ZVE32F-NEXT: .LBB102_9: # %else14
; RV64ZVE32F-NEXT: andi a4, a3, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: beqz a4, .LBB92_11
+; RV64ZVE32F-NEXT: beqz a4, .LBB102_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a4, v8
; RV64ZVE32F-NEXT: and a4, a4, a2
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa6, 0(a4)
-; RV64ZVE32F-NEXT: .LBB92_11: # %else17
+; RV64ZVE32F-NEXT: .LBB102_11: # %else17
; RV64ZVE32F-NEXT: andi a3, a3, -128
-; RV64ZVE32F-NEXT: beqz a3, .LBB92_13
+; RV64ZVE32F-NEXT: beqz a3, .LBB102_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
@@ -10961,7 +16105,7 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
-; RV64ZVE32F-NEXT: .LBB92_13: # %else20
+; RV64ZVE32F-NEXT: .LBB102_13: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
@@ -10971,15 +16115,15 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB92_14: # %cond.load4
+; RV64ZVE32F-NEXT: .LBB102_14: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a4, v8
; RV64ZVE32F-NEXT: and a4, a4, a2
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa2, 0(a4)
; RV64ZVE32F-NEXT: andi a4, a3, 8
-; RV64ZVE32F-NEXT: beqz a4, .LBB92_6
-; RV64ZVE32F-NEXT: .LBB92_15: # %cond.load7
+; RV64ZVE32F-NEXT: beqz a4, .LBB102_6
+; RV64ZVE32F-NEXT: .LBB102_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v8
; RV64ZVE32F-NEXT: and a4, a4, a2
@@ -10987,16 +16131,16 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa3, 0(a4)
; RV64ZVE32F-NEXT: andi a4, a3, 16
-; RV64ZVE32F-NEXT: beqz a4, .LBB92_7
-; RV64ZVE32F-NEXT: .LBB92_16: # %cond.load10
+; RV64ZVE32F-NEXT: beqz a4, .LBB102_7
+; RV64ZVE32F-NEXT: .LBB102_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
; RV64ZVE32F-NEXT: and a4, a4, a2
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa4, 0(a4)
; RV64ZVE32F-NEXT: andi a4, a3, 32
-; RV64ZVE32F-NEXT: bnez a4, .LBB92_8
-; RV64ZVE32F-NEXT: j .LBB92_9
+; RV64ZVE32F-NEXT: bnez a4, .LBB102_8
+; RV64ZVE32F-NEXT: j .LBB102_9
%eidxs = zext <8 x i16> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
@@ -11031,34 +16175,34 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8
; RV32ZVE32F-NEXT: andi a3, a2, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
-; RV32ZVE32F-NEXT: bnez a3, .LBB93_10
+; RV32ZVE32F-NEXT: bnez a3, .LBB103_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a1, a2, 2
-; RV32ZVE32F-NEXT: bnez a1, .LBB93_11
-; RV32ZVE32F-NEXT: .LBB93_2: # %else2
+; RV32ZVE32F-NEXT: bnez a1, .LBB103_11
+; RV32ZVE32F-NEXT: .LBB103_2: # %else2
; RV32ZVE32F-NEXT: andi a1, a2, 4
-; RV32ZVE32F-NEXT: bnez a1, .LBB93_12
-; RV32ZVE32F-NEXT: .LBB93_3: # %else5
+; RV32ZVE32F-NEXT: bnez a1, .LBB103_12
+; RV32ZVE32F-NEXT: .LBB103_3: # %else5
; RV32ZVE32F-NEXT: andi a1, a2, 8
-; RV32ZVE32F-NEXT: bnez a1, .LBB93_13
-; RV32ZVE32F-NEXT: .LBB93_4: # %else8
+; RV32ZVE32F-NEXT: bnez a1, .LBB103_13
+; RV32ZVE32F-NEXT: .LBB103_4: # %else8
; RV32ZVE32F-NEXT: andi a1, a2, 16
-; RV32ZVE32F-NEXT: bnez a1, .LBB93_14
-; RV32ZVE32F-NEXT: .LBB93_5: # %else11
+; RV32ZVE32F-NEXT: bnez a1, .LBB103_14
+; RV32ZVE32F-NEXT: .LBB103_5: # %else11
; RV32ZVE32F-NEXT: andi a1, a2, 32
-; RV32ZVE32F-NEXT: bnez a1, .LBB93_15
-; RV32ZVE32F-NEXT: .LBB93_6: # %else14
+; RV32ZVE32F-NEXT: bnez a1, .LBB103_15
+; RV32ZVE32F-NEXT: .LBB103_6: # %else14
; RV32ZVE32F-NEXT: andi a1, a2, 64
-; RV32ZVE32F-NEXT: bnez a1, .LBB93_16
-; RV32ZVE32F-NEXT: .LBB93_7: # %else17
+; RV32ZVE32F-NEXT: bnez a1, .LBB103_16
+; RV32ZVE32F-NEXT: .LBB103_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a2, -128
-; RV32ZVE32F-NEXT: beqz a1, .LBB93_9
-; RV32ZVE32F-NEXT: .LBB93_8: # %cond.load19
+; RV32ZVE32F-NEXT: beqz a1, .LBB103_9
+; RV32ZVE32F-NEXT: .LBB103_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
-; RV32ZVE32F-NEXT: .LBB93_9: # %else20
+; RV32ZVE32F-NEXT: .LBB103_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
@@ -11068,70 +16212,70 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB93_10: # %cond.load
+; RV32ZVE32F-NEXT: .LBB103_10: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 2
-; RV32ZVE32F-NEXT: beqz a1, .LBB93_2
-; RV32ZVE32F-NEXT: .LBB93_11: # %cond.load1
+; RV32ZVE32F-NEXT: beqz a1, .LBB103_2
+; RV32ZVE32F-NEXT: .LBB103_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 4
-; RV32ZVE32F-NEXT: beqz a1, .LBB93_3
-; RV32ZVE32F-NEXT: .LBB93_12: # %cond.load4
+; RV32ZVE32F-NEXT: beqz a1, .LBB103_3
+; RV32ZVE32F-NEXT: .LBB103_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 8
-; RV32ZVE32F-NEXT: beqz a1, .LBB93_4
-; RV32ZVE32F-NEXT: .LBB93_13: # %cond.load7
+; RV32ZVE32F-NEXT: beqz a1, .LBB103_4
+; RV32ZVE32F-NEXT: .LBB103_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 16
-; RV32ZVE32F-NEXT: beqz a1, .LBB93_5
-; RV32ZVE32F-NEXT: .LBB93_14: # %cond.load10
+; RV32ZVE32F-NEXT: beqz a1, .LBB103_5
+; RV32ZVE32F-NEXT: .LBB103_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 32
-; RV32ZVE32F-NEXT: beqz a1, .LBB93_6
-; RV32ZVE32F-NEXT: .LBB93_15: # %cond.load13
+; RV32ZVE32F-NEXT: beqz a1, .LBB103_6
+; RV32ZVE32F-NEXT: .LBB103_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 64
-; RV32ZVE32F-NEXT: beqz a1, .LBB93_7
-; RV32ZVE32F-NEXT: .LBB93_16: # %cond.load16
+; RV32ZVE32F-NEXT: beqz a1, .LBB103_7
+; RV32ZVE32F-NEXT: .LBB103_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, -128
-; RV32ZVE32F-NEXT: bnez a1, .LBB93_8
-; RV32ZVE32F-NEXT: j .LBB93_9
+; RV32ZVE32F-NEXT: bnez a1, .LBB103_8
+; RV32ZVE32F-NEXT: j .LBB103_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i32_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB93_2
+; RV64ZVE32F-NEXT: beqz a3, .LBB103_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
-; RV64ZVE32F-NEXT: .LBB93_2: # %else
+; RV64ZVE32F-NEXT: .LBB103_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB93_4
+; RV64ZVE32F-NEXT: beqz a3, .LBB103_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
@@ -11139,47 +16283,47 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
-; RV64ZVE32F-NEXT: .LBB93_4: # %else2
+; RV64ZVE32F-NEXT: .LBB103_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB93_14
+; RV64ZVE32F-NEXT: bnez a3, .LBB103_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: bnez a3, .LBB93_15
-; RV64ZVE32F-NEXT: .LBB93_6: # %else8
+; RV64ZVE32F-NEXT: bnez a3, .LBB103_15
+; RV64ZVE32F-NEXT: .LBB103_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: bnez a3, .LBB93_16
-; RV64ZVE32F-NEXT: .LBB93_7: # %else11
+; RV64ZVE32F-NEXT: bnez a3, .LBB103_16
+; RV64ZVE32F-NEXT: .LBB103_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: beqz a3, .LBB93_9
-; RV64ZVE32F-NEXT: .LBB93_8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a3, .LBB103_9
+; RV64ZVE32F-NEXT: .LBB103_8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
-; RV64ZVE32F-NEXT: .LBB93_9: # %else14
+; RV64ZVE32F-NEXT: .LBB103_9: # %else14
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB93_11
+; RV64ZVE32F-NEXT: beqz a3, .LBB103_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
-; RV64ZVE32F-NEXT: .LBB93_11: # %else17
+; RV64ZVE32F-NEXT: .LBB103_11: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
-; RV64ZVE32F-NEXT: beqz a2, .LBB93_13
+; RV64ZVE32F-NEXT: beqz a2, .LBB103_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
-; RV64ZVE32F-NEXT: .LBB93_13: # %else20
+; RV64ZVE32F-NEXT: .LBB103_13: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
@@ -11189,29 +16333,29 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB93_14: # %cond.load4
+; RV64ZVE32F-NEXT: .LBB103_14: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB93_6
-; RV64ZVE32F-NEXT: .LBB93_15: # %cond.load7
+; RV64ZVE32F-NEXT: beqz a3, .LBB103_6
+; RV64ZVE32F-NEXT: .LBB103_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: beqz a3, .LBB93_7
-; RV64ZVE32F-NEXT: .LBB93_16: # %cond.load10
+; RV64ZVE32F-NEXT: beqz a3, .LBB103_7
+; RV64ZVE32F-NEXT: .LBB103_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: bnez a3, .LBB93_8
-; RV64ZVE32F-NEXT: j .LBB93_9
+; RV64ZVE32F-NEXT: bnez a3, .LBB103_8
+; RV64ZVE32F-NEXT: j .LBB103_9
%ptrs = getelementptr inbounds double, ptr %base, <8 x i32> %idxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
@@ -11245,34 +16389,34 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV32ZVE32F-NEXT: andi a3, a2, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
-; RV32ZVE32F-NEXT: bnez a3, .LBB94_10
+; RV32ZVE32F-NEXT: bnez a3, .LBB104_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a1, a2, 2
-; RV32ZVE32F-NEXT: bnez a1, .LBB94_11
-; RV32ZVE32F-NEXT: .LBB94_2: # %else2
+; RV32ZVE32F-NEXT: bnez a1, .LBB104_11
+; RV32ZVE32F-NEXT: .LBB104_2: # %else2
; RV32ZVE32F-NEXT: andi a1, a2, 4
-; RV32ZVE32F-NEXT: bnez a1, .LBB94_12
-; RV32ZVE32F-NEXT: .LBB94_3: # %else5
+; RV32ZVE32F-NEXT: bnez a1, .LBB104_12
+; RV32ZVE32F-NEXT: .LBB104_3: # %else5
; RV32ZVE32F-NEXT: andi a1, a2, 8
-; RV32ZVE32F-NEXT: bnez a1, .LBB94_13
-; RV32ZVE32F-NEXT: .LBB94_4: # %else8
+; RV32ZVE32F-NEXT: bnez a1, .LBB104_13
+; RV32ZVE32F-NEXT: .LBB104_4: # %else8
; RV32ZVE32F-NEXT: andi a1, a2, 16
-; RV32ZVE32F-NEXT: bnez a1, .LBB94_14
-; RV32ZVE32F-NEXT: .LBB94_5: # %else11
+; RV32ZVE32F-NEXT: bnez a1, .LBB104_14
+; RV32ZVE32F-NEXT: .LBB104_5: # %else11
; RV32ZVE32F-NEXT: andi a1, a2, 32
-; RV32ZVE32F-NEXT: bnez a1, .LBB94_15
-; RV32ZVE32F-NEXT: .LBB94_6: # %else14
+; RV32ZVE32F-NEXT: bnez a1, .LBB104_15
+; RV32ZVE32F-NEXT: .LBB104_6: # %else14
; RV32ZVE32F-NEXT: andi a1, a2, 64
-; RV32ZVE32F-NEXT: bnez a1, .LBB94_16
-; RV32ZVE32F-NEXT: .LBB94_7: # %else17
+; RV32ZVE32F-NEXT: bnez a1, .LBB104_16
+; RV32ZVE32F-NEXT: .LBB104_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a2, -128
-; RV32ZVE32F-NEXT: beqz a1, .LBB94_9
-; RV32ZVE32F-NEXT: .LBB94_8: # %cond.load19
+; RV32ZVE32F-NEXT: beqz a1, .LBB104_9
+; RV32ZVE32F-NEXT: .LBB104_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
-; RV32ZVE32F-NEXT: .LBB94_9: # %else20
+; RV32ZVE32F-NEXT: .LBB104_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
@@ -11282,70 +16426,70 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB94_10: # %cond.load
+; RV32ZVE32F-NEXT: .LBB104_10: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 2
-; RV32ZVE32F-NEXT: beqz a1, .LBB94_2
-; RV32ZVE32F-NEXT: .LBB94_11: # %cond.load1
+; RV32ZVE32F-NEXT: beqz a1, .LBB104_2
+; RV32ZVE32F-NEXT: .LBB104_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 4
-; RV32ZVE32F-NEXT: beqz a1, .LBB94_3
-; RV32ZVE32F-NEXT: .LBB94_12: # %cond.load4
+; RV32ZVE32F-NEXT: beqz a1, .LBB104_3
+; RV32ZVE32F-NEXT: .LBB104_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 8
-; RV32ZVE32F-NEXT: beqz a1, .LBB94_4
-; RV32ZVE32F-NEXT: .LBB94_13: # %cond.load7
+; RV32ZVE32F-NEXT: beqz a1, .LBB104_4
+; RV32ZVE32F-NEXT: .LBB104_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 16
-; RV32ZVE32F-NEXT: beqz a1, .LBB94_5
-; RV32ZVE32F-NEXT: .LBB94_14: # %cond.load10
+; RV32ZVE32F-NEXT: beqz a1, .LBB104_5
+; RV32ZVE32F-NEXT: .LBB104_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 32
-; RV32ZVE32F-NEXT: beqz a1, .LBB94_6
-; RV32ZVE32F-NEXT: .LBB94_15: # %cond.load13
+; RV32ZVE32F-NEXT: beqz a1, .LBB104_6
+; RV32ZVE32F-NEXT: .LBB104_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 64
-; RV32ZVE32F-NEXT: beqz a1, .LBB94_7
-; RV32ZVE32F-NEXT: .LBB94_16: # %cond.load16
+; RV32ZVE32F-NEXT: beqz a1, .LBB104_7
+; RV32ZVE32F-NEXT: .LBB104_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, -128
-; RV32ZVE32F-NEXT: bnez a1, .LBB94_8
-; RV32ZVE32F-NEXT: j .LBB94_9
+; RV32ZVE32F-NEXT: bnez a1, .LBB104_8
+; RV32ZVE32F-NEXT: j .LBB104_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB94_2
+; RV64ZVE32F-NEXT: beqz a3, .LBB104_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
-; RV64ZVE32F-NEXT: .LBB94_2: # %else
+; RV64ZVE32F-NEXT: .LBB104_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB94_4
+; RV64ZVE32F-NEXT: beqz a3, .LBB104_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
@@ -11353,47 +16497,47 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
-; RV64ZVE32F-NEXT: .LBB94_4: # %else2
+; RV64ZVE32F-NEXT: .LBB104_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB94_14
+; RV64ZVE32F-NEXT: bnez a3, .LBB104_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: bnez a3, .LBB94_15
-; RV64ZVE32F-NEXT: .LBB94_6: # %else8
+; RV64ZVE32F-NEXT: bnez a3, .LBB104_15
+; RV64ZVE32F-NEXT: .LBB104_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: bnez a3, .LBB94_16
-; RV64ZVE32F-NEXT: .LBB94_7: # %else11
+; RV64ZVE32F-NEXT: bnez a3, .LBB104_16
+; RV64ZVE32F-NEXT: .LBB104_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: beqz a3, .LBB94_9
-; RV64ZVE32F-NEXT: .LBB94_8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a3, .LBB104_9
+; RV64ZVE32F-NEXT: .LBB104_8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
-; RV64ZVE32F-NEXT: .LBB94_9: # %else14
+; RV64ZVE32F-NEXT: .LBB104_9: # %else14
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB94_11
+; RV64ZVE32F-NEXT: beqz a3, .LBB104_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
-; RV64ZVE32F-NEXT: .LBB94_11: # %else17
+; RV64ZVE32F-NEXT: .LBB104_11: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
-; RV64ZVE32F-NEXT: beqz a2, .LBB94_13
+; RV64ZVE32F-NEXT: beqz a2, .LBB104_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
-; RV64ZVE32F-NEXT: .LBB94_13: # %else20
+; RV64ZVE32F-NEXT: .LBB104_13: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
@@ -11403,29 +16547,29 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB94_14: # %cond.load4
+; RV64ZVE32F-NEXT: .LBB104_14: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB94_6
-; RV64ZVE32F-NEXT: .LBB94_15: # %cond.load7
+; RV64ZVE32F-NEXT: beqz a3, .LBB104_6
+; RV64ZVE32F-NEXT: .LBB104_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: beqz a3, .LBB94_7
-; RV64ZVE32F-NEXT: .LBB94_16: # %cond.load10
+; RV64ZVE32F-NEXT: beqz a3, .LBB104_7
+; RV64ZVE32F-NEXT: .LBB104_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: bnez a3, .LBB94_8
-; RV64ZVE32F-NEXT: j .LBB94_9
+; RV64ZVE32F-NEXT: bnez a3, .LBB104_8
+; RV64ZVE32F-NEXT: j .LBB104_9
%eidxs = sext <8 x i32> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
@@ -11460,34 +16604,34 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV32ZVE32F-NEXT: andi a3, a2, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
-; RV32ZVE32F-NEXT: bnez a3, .LBB95_10
+; RV32ZVE32F-NEXT: bnez a3, .LBB105_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a1, a2, 2
-; RV32ZVE32F-NEXT: bnez a1, .LBB95_11
-; RV32ZVE32F-NEXT: .LBB95_2: # %else2
+; RV32ZVE32F-NEXT: bnez a1, .LBB105_11
+; RV32ZVE32F-NEXT: .LBB105_2: # %else2
; RV32ZVE32F-NEXT: andi a1, a2, 4
-; RV32ZVE32F-NEXT: bnez a1, .LBB95_12
-; RV32ZVE32F-NEXT: .LBB95_3: # %else5
+; RV32ZVE32F-NEXT: bnez a1, .LBB105_12
+; RV32ZVE32F-NEXT: .LBB105_3: # %else5
; RV32ZVE32F-NEXT: andi a1, a2, 8
-; RV32ZVE32F-NEXT: bnez a1, .LBB95_13
-; RV32ZVE32F-NEXT: .LBB95_4: # %else8
+; RV32ZVE32F-NEXT: bnez a1, .LBB105_13
+; RV32ZVE32F-NEXT: .LBB105_4: # %else8
; RV32ZVE32F-NEXT: andi a1, a2, 16
-; RV32ZVE32F-NEXT: bnez a1, .LBB95_14
-; RV32ZVE32F-NEXT: .LBB95_5: # %else11
+; RV32ZVE32F-NEXT: bnez a1, .LBB105_14
+; RV32ZVE32F-NEXT: .LBB105_5: # %else11
; RV32ZVE32F-NEXT: andi a1, a2, 32
-; RV32ZVE32F-NEXT: bnez a1, .LBB95_15
-; RV32ZVE32F-NEXT: .LBB95_6: # %else14
+; RV32ZVE32F-NEXT: bnez a1, .LBB105_15
+; RV32ZVE32F-NEXT: .LBB105_6: # %else14
; RV32ZVE32F-NEXT: andi a1, a2, 64
-; RV32ZVE32F-NEXT: bnez a1, .LBB95_16
-; RV32ZVE32F-NEXT: .LBB95_7: # %else17
+; RV32ZVE32F-NEXT: bnez a1, .LBB105_16
+; RV32ZVE32F-NEXT: .LBB105_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a2, -128
-; RV32ZVE32F-NEXT: beqz a1, .LBB95_9
-; RV32ZVE32F-NEXT: .LBB95_8: # %cond.load19
+; RV32ZVE32F-NEXT: beqz a1, .LBB105_9
+; RV32ZVE32F-NEXT: .LBB105_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
-; RV32ZVE32F-NEXT: .LBB95_9: # %else20
+; RV32ZVE32F-NEXT: .LBB105_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
@@ -11497,61 +16641,61 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB95_10: # %cond.load
+; RV32ZVE32F-NEXT: .LBB105_10: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 2
-; RV32ZVE32F-NEXT: beqz a1, .LBB95_2
-; RV32ZVE32F-NEXT: .LBB95_11: # %cond.load1
+; RV32ZVE32F-NEXT: beqz a1, .LBB105_2
+; RV32ZVE32F-NEXT: .LBB105_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 4
-; RV32ZVE32F-NEXT: beqz a1, .LBB95_3
-; RV32ZVE32F-NEXT: .LBB95_12: # %cond.load4
+; RV32ZVE32F-NEXT: beqz a1, .LBB105_3
+; RV32ZVE32F-NEXT: .LBB105_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 8
-; RV32ZVE32F-NEXT: beqz a1, .LBB95_4
-; RV32ZVE32F-NEXT: .LBB95_13: # %cond.load7
+; RV32ZVE32F-NEXT: beqz a1, .LBB105_4
+; RV32ZVE32F-NEXT: .LBB105_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 16
-; RV32ZVE32F-NEXT: beqz a1, .LBB95_5
-; RV32ZVE32F-NEXT: .LBB95_14: # %cond.load10
+; RV32ZVE32F-NEXT: beqz a1, .LBB105_5
+; RV32ZVE32F-NEXT: .LBB105_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 32
-; RV32ZVE32F-NEXT: beqz a1, .LBB95_6
-; RV32ZVE32F-NEXT: .LBB95_15: # %cond.load13
+; RV32ZVE32F-NEXT: beqz a1, .LBB105_6
+; RV32ZVE32F-NEXT: .LBB105_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 64
-; RV32ZVE32F-NEXT: beqz a1, .LBB95_7
-; RV32ZVE32F-NEXT: .LBB95_16: # %cond.load16
+; RV32ZVE32F-NEXT: beqz a1, .LBB105_7
+; RV32ZVE32F-NEXT: .LBB105_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, -128
-; RV32ZVE32F-NEXT: bnez a1, .LBB95_8
-; RV32ZVE32F-NEXT: j .LBB95_9
+; RV32ZVE32F-NEXT: bnez a1, .LBB105_8
+; RV32ZVE32F-NEXT: j .LBB105_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
-; RV64ZVE32F-NEXT: beqz a3, .LBB95_2
+; RV64ZVE32F-NEXT: beqz a3, .LBB105_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
@@ -11559,9 +16703,9 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
-; RV64ZVE32F-NEXT: .LBB95_2: # %else
+; RV64ZVE32F-NEXT: .LBB105_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB95_4
+; RV64ZVE32F-NEXT: beqz a3, .LBB105_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
@@ -11570,42 +16714,42 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
-; RV64ZVE32F-NEXT: .LBB95_4: # %else2
+; RV64ZVE32F-NEXT: .LBB105_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB95_14
+; RV64ZVE32F-NEXT: bnez a3, .LBB105_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: bnez a3, .LBB95_15
-; RV64ZVE32F-NEXT: .LBB95_6: # %else8
+; RV64ZVE32F-NEXT: bnez a3, .LBB105_15
+; RV64ZVE32F-NEXT: .LBB105_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: bnez a3, .LBB95_16
-; RV64ZVE32F-NEXT: .LBB95_7: # %else11
+; RV64ZVE32F-NEXT: bnez a3, .LBB105_16
+; RV64ZVE32F-NEXT: .LBB105_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: beqz a3, .LBB95_9
-; RV64ZVE32F-NEXT: .LBB95_8: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a3, .LBB105_9
+; RV64ZVE32F-NEXT: .LBB105_8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 32
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
-; RV64ZVE32F-NEXT: .LBB95_9: # %else14
+; RV64ZVE32F-NEXT: .LBB105_9: # %else14
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB95_11
+; RV64ZVE32F-NEXT: beqz a3, .LBB105_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 32
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
-; RV64ZVE32F-NEXT: .LBB95_11: # %else17
+; RV64ZVE32F-NEXT: .LBB105_11: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
-; RV64ZVE32F-NEXT: beqz a2, .LBB95_13
+; RV64ZVE32F-NEXT: beqz a2, .LBB105_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -11613,7 +16757,7 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: srli a2, a2, 29
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
-; RV64ZVE32F-NEXT: .LBB95_13: # %else20
+; RV64ZVE32F-NEXT: .LBB105_13: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
@@ -11623,15 +16767,15 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB95_14: # %cond.load4
+; RV64ZVE32F-NEXT: .LBB105_14: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 32
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB95_6
-; RV64ZVE32F-NEXT: .LBB95_15: # %cond.load7
+; RV64ZVE32F-NEXT: beqz a3, .LBB105_6
+; RV64ZVE32F-NEXT: .LBB105_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 32
@@ -11639,16 +16783,16 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: beqz a3, .LBB95_7
-; RV64ZVE32F-NEXT: .LBB95_16: # %cond.load10
+; RV64ZVE32F-NEXT: beqz a3, .LBB105_7
+; RV64ZVE32F-NEXT: .LBB105_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: slli a3, a3, 32
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: bnez a3, .LBB95_8
-; RV64ZVE32F-NEXT: j .LBB95_9
+; RV64ZVE32F-NEXT: bnez a3, .LBB105_8
+; RV64ZVE32F-NEXT: j .LBB105_9
%eidxs = zext <8 x i32> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
@@ -11699,34 +16843,34 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1>
; RV32ZVE32F-NEXT: andi a3, a2, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
-; RV32ZVE32F-NEXT: bnez a3, .LBB96_10
+; RV32ZVE32F-NEXT: bnez a3, .LBB106_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a1, a2, 2
-; RV32ZVE32F-NEXT: bnez a1, .LBB96_11
-; RV32ZVE32F-NEXT: .LBB96_2: # %else2
+; RV32ZVE32F-NEXT: bnez a1, .LBB106_11
+; RV32ZVE32F-NEXT: .LBB106_2: # %else2
; RV32ZVE32F-NEXT: andi a1, a2, 4
-; RV32ZVE32F-NEXT: bnez a1, .LBB96_12
-; RV32ZVE32F-NEXT: .LBB96_3: # %else5
+; RV32ZVE32F-NEXT: bnez a1, .LBB106_12
+; RV32ZVE32F-NEXT: .LBB106_3: # %else5
; RV32ZVE32F-NEXT: andi a1, a2, 8
-; RV32ZVE32F-NEXT: bnez a1, .LBB96_13
-; RV32ZVE32F-NEXT: .LBB96_4: # %else8
+; RV32ZVE32F-NEXT: bnez a1, .LBB106_13
+; RV32ZVE32F-NEXT: .LBB106_4: # %else8
; RV32ZVE32F-NEXT: andi a1, a2, 16
-; RV32ZVE32F-NEXT: bnez a1, .LBB96_14
-; RV32ZVE32F-NEXT: .LBB96_5: # %else11
+; RV32ZVE32F-NEXT: bnez a1, .LBB106_14
+; RV32ZVE32F-NEXT: .LBB106_5: # %else11
; RV32ZVE32F-NEXT: andi a1, a2, 32
-; RV32ZVE32F-NEXT: bnez a1, .LBB96_15
-; RV32ZVE32F-NEXT: .LBB96_6: # %else14
+; RV32ZVE32F-NEXT: bnez a1, .LBB106_15
+; RV32ZVE32F-NEXT: .LBB106_6: # %else14
; RV32ZVE32F-NEXT: andi a1, a2, 64
-; RV32ZVE32F-NEXT: bnez a1, .LBB96_16
-; RV32ZVE32F-NEXT: .LBB96_7: # %else17
+; RV32ZVE32F-NEXT: bnez a1, .LBB106_16
+; RV32ZVE32F-NEXT: .LBB106_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a2, -128
-; RV32ZVE32F-NEXT: beqz a1, .LBB96_9
-; RV32ZVE32F-NEXT: .LBB96_8: # %cond.load19
+; RV32ZVE32F-NEXT: beqz a1, .LBB106_9
+; RV32ZVE32F-NEXT: .LBB106_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
-; RV32ZVE32F-NEXT: .LBB96_9: # %else20
+; RV32ZVE32F-NEXT: .LBB106_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
@@ -11736,88 +16880,88 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1>
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB96_10: # %cond.load
+; RV32ZVE32F-NEXT: .LBB106_10: # %cond.load
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 2
-; RV32ZVE32F-NEXT: beqz a1, .LBB96_2
-; RV32ZVE32F-NEXT: .LBB96_11: # %cond.load1
+; RV32ZVE32F-NEXT: beqz a1, .LBB106_2
+; RV32ZVE32F-NEXT: .LBB106_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 4
-; RV32ZVE32F-NEXT: beqz a1, .LBB96_3
-; RV32ZVE32F-NEXT: .LBB96_12: # %cond.load4
+; RV32ZVE32F-NEXT: beqz a1, .LBB106_3
+; RV32ZVE32F-NEXT: .LBB106_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 8
-; RV32ZVE32F-NEXT: beqz a1, .LBB96_4
-; RV32ZVE32F-NEXT: .LBB96_13: # %cond.load7
+; RV32ZVE32F-NEXT: beqz a1, .LBB106_4
+; RV32ZVE32F-NEXT: .LBB106_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 16
-; RV32ZVE32F-NEXT: beqz a1, .LBB96_5
-; RV32ZVE32F-NEXT: .LBB96_14: # %cond.load10
+; RV32ZVE32F-NEXT: beqz a1, .LBB106_5
+; RV32ZVE32F-NEXT: .LBB106_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 32
-; RV32ZVE32F-NEXT: beqz a1, .LBB96_6
-; RV32ZVE32F-NEXT: .LBB96_15: # %cond.load13
+; RV32ZVE32F-NEXT: beqz a1, .LBB106_6
+; RV32ZVE32F-NEXT: .LBB106_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, 64
-; RV32ZVE32F-NEXT: beqz a1, .LBB96_7
-; RV32ZVE32F-NEXT: .LBB96_16: # %cond.load16
+; RV32ZVE32F-NEXT: beqz a1, .LBB106_7
+; RV32ZVE32F-NEXT: .LBB106_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a2, -128
-; RV32ZVE32F-NEXT: bnez a1, .LBB96_8
-; RV32ZVE32F-NEXT: j .LBB96_9
+; RV32ZVE32F-NEXT: bnez a1, .LBB106_8
+; RV32ZVE32F-NEXT: j .LBB106_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi a4, a3, 1
-; RV64ZVE32F-NEXT: bnez a4, .LBB96_10
+; RV64ZVE32F-NEXT: bnez a4, .LBB106_10
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a4, a3, 2
-; RV64ZVE32F-NEXT: bnez a4, .LBB96_11
-; RV64ZVE32F-NEXT: .LBB96_2: # %else2
+; RV64ZVE32F-NEXT: bnez a4, .LBB106_11
+; RV64ZVE32F-NEXT: .LBB106_2: # %else2
; RV64ZVE32F-NEXT: andi a4, a3, 4
-; RV64ZVE32F-NEXT: bnez a4, .LBB96_12
-; RV64ZVE32F-NEXT: .LBB96_3: # %else5
+; RV64ZVE32F-NEXT: bnez a4, .LBB106_12
+; RV64ZVE32F-NEXT: .LBB106_3: # %else5
; RV64ZVE32F-NEXT: andi a4, a3, 8
-; RV64ZVE32F-NEXT: bnez a4, .LBB96_13
-; RV64ZVE32F-NEXT: .LBB96_4: # %else8
+; RV64ZVE32F-NEXT: bnez a4, .LBB106_13
+; RV64ZVE32F-NEXT: .LBB106_4: # %else8
; RV64ZVE32F-NEXT: andi a4, a3, 16
-; RV64ZVE32F-NEXT: bnez a4, .LBB96_14
-; RV64ZVE32F-NEXT: .LBB96_5: # %else11
+; RV64ZVE32F-NEXT: bnez a4, .LBB106_14
+; RV64ZVE32F-NEXT: .LBB106_5: # %else11
; RV64ZVE32F-NEXT: andi a4, a3, 32
-; RV64ZVE32F-NEXT: bnez a4, .LBB96_15
-; RV64ZVE32F-NEXT: .LBB96_6: # %else14
+; RV64ZVE32F-NEXT: bnez a4, .LBB106_15
+; RV64ZVE32F-NEXT: .LBB106_6: # %else14
; RV64ZVE32F-NEXT: andi a4, a3, 64
-; RV64ZVE32F-NEXT: bnez a4, .LBB96_16
-; RV64ZVE32F-NEXT: .LBB96_7: # %else17
+; RV64ZVE32F-NEXT: bnez a4, .LBB106_16
+; RV64ZVE32F-NEXT: .LBB106_7: # %else17
; RV64ZVE32F-NEXT: andi a3, a3, -128
-; RV64ZVE32F-NEXT: beqz a3, .LBB96_9
-; RV64ZVE32F-NEXT: .LBB96_8: # %cond.load19
+; RV64ZVE32F-NEXT: beqz a3, .LBB106_9
+; RV64ZVE32F-NEXT: .LBB106_8: # %cond.load19
; RV64ZVE32F-NEXT: ld a2, 56(a2)
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
-; RV64ZVE32F-NEXT: .LBB96_9: # %else20
+; RV64ZVE32F-NEXT: .LBB106_9: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
@@ -11827,56 +16971,56 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1>
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB96_10: # %cond.load
+; RV64ZVE32F-NEXT: .LBB106_10: # %cond.load
; RV64ZVE32F-NEXT: ld a4, 0(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa0, 0(a4)
; RV64ZVE32F-NEXT: andi a4, a3, 2
-; RV64ZVE32F-NEXT: beqz a4, .LBB96_2
-; RV64ZVE32F-NEXT: .LBB96_11: # %cond.load1
+; RV64ZVE32F-NEXT: beqz a4, .LBB106_2
+; RV64ZVE32F-NEXT: .LBB106_11: # %cond.load1
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa1, 0(a4)
; RV64ZVE32F-NEXT: andi a4, a3, 4
-; RV64ZVE32F-NEXT: beqz a4, .LBB96_3
-; RV64ZVE32F-NEXT: .LBB96_12: # %cond.load4
+; RV64ZVE32F-NEXT: beqz a4, .LBB106_3
+; RV64ZVE32F-NEXT: .LBB106_12: # %cond.load4
; RV64ZVE32F-NEXT: ld a4, 16(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa2, 0(a4)
; RV64ZVE32F-NEXT: andi a4, a3, 8
-; RV64ZVE32F-NEXT: beqz a4, .LBB96_4
-; RV64ZVE32F-NEXT: .LBB96_13: # %cond.load7
+; RV64ZVE32F-NEXT: beqz a4, .LBB106_4
+; RV64ZVE32F-NEXT: .LBB106_13: # %cond.load7
; RV64ZVE32F-NEXT: ld a4, 24(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa3, 0(a4)
; RV64ZVE32F-NEXT: andi a4, a3, 16
-; RV64ZVE32F-NEXT: beqz a4, .LBB96_5
-; RV64ZVE32F-NEXT: .LBB96_14: # %cond.load10
+; RV64ZVE32F-NEXT: beqz a4, .LBB106_5
+; RV64ZVE32F-NEXT: .LBB106_14: # %cond.load10
; RV64ZVE32F-NEXT: ld a4, 32(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa4, 0(a4)
; RV64ZVE32F-NEXT: andi a4, a3, 32
-; RV64ZVE32F-NEXT: beqz a4, .LBB96_6
-; RV64ZVE32F-NEXT: .LBB96_15: # %cond.load13
+; RV64ZVE32F-NEXT: beqz a4, .LBB106_6
+; RV64ZVE32F-NEXT: .LBB106_15: # %cond.load13
; RV64ZVE32F-NEXT: ld a4, 40(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa5, 0(a4)
; RV64ZVE32F-NEXT: andi a4, a3, 64
-; RV64ZVE32F-NEXT: beqz a4, .LBB96_7
-; RV64ZVE32F-NEXT: .LBB96_16: # %cond.load16
+; RV64ZVE32F-NEXT: beqz a4, .LBB106_7
+; RV64ZVE32F-NEXT: .LBB106_16: # %cond.load16
; RV64ZVE32F-NEXT: ld a4, 48(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa6, 0(a4)
; RV64ZVE32F-NEXT: andi a3, a3, -128
-; RV64ZVE32F-NEXT: bnez a3, .LBB96_8
-; RV64ZVE32F-NEXT: j .LBB96_9
+; RV64ZVE32F-NEXT: bnez a3, .LBB106_8
+; RV64ZVE32F-NEXT: j .LBB106_9
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %idxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
@@ -11908,16 +17052,16 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
-; RV64ZVE32F-NEXT: beqz a2, .LBB97_2
+; RV64ZVE32F-NEXT: beqz a2, .LBB107_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
-; RV64ZVE32F-NEXT: .LBB97_2: # %else
+; RV64ZVE32F-NEXT: .LBB107_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB97_4
+; RV64ZVE32F-NEXT: beqz a2, .LBB107_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
@@ -11927,31 +17071,31 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
-; RV64ZVE32F-NEXT: .LBB97_4: # %else2
+; RV64ZVE32F-NEXT: .LBB107_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB97_25
+; RV64ZVE32F-NEXT: bnez a2, .LBB107_25
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB97_26
-; RV64ZVE32F-NEXT: .LBB97_6: # %else8
+; RV64ZVE32F-NEXT: bnez a2, .LBB107_26
+; RV64ZVE32F-NEXT: .LBB107_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB97_8
-; RV64ZVE32F-NEXT: .LBB97_7: # %cond.load10
+; RV64ZVE32F-NEXT: beqz a2, .LBB107_8
+; RV64ZVE32F-NEXT: .LBB107_7: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 4
-; RV64ZVE32F-NEXT: .LBB97_8: # %else11
+; RV64ZVE32F-NEXT: .LBB107_8: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB97_10
+; RV64ZVE32F-NEXT: beqz a2, .LBB107_10
; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
@@ -11961,21 +17105,21 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 5
-; RV64ZVE32F-NEXT: .LBB97_10: # %else14
+; RV64ZVE32F-NEXT: .LBB107_10: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB97_27
+; RV64ZVE32F-NEXT: bnez a2, .LBB107_27
; RV64ZVE32F-NEXT: # %bb.11: # %else17
; RV64ZVE32F-NEXT: andi a2, a1, 128
-; RV64ZVE32F-NEXT: bnez a2, .LBB97_28
-; RV64ZVE32F-NEXT: .LBB97_12: # %else20
+; RV64ZVE32F-NEXT: bnez a2, .LBB107_28
+; RV64ZVE32F-NEXT: .LBB107_12: # %else20
; RV64ZVE32F-NEXT: andi a2, a1, 256
-; RV64ZVE32F-NEXT: bnez a2, .LBB97_29
-; RV64ZVE32F-NEXT: .LBB97_13: # %else23
+; RV64ZVE32F-NEXT: bnez a2, .LBB107_29
+; RV64ZVE32F-NEXT: .LBB107_13: # %else23
; RV64ZVE32F-NEXT: andi a2, a1, 512
-; RV64ZVE32F-NEXT: beqz a2, .LBB97_15
-; RV64ZVE32F-NEXT: .LBB97_14: # %cond.load25
+; RV64ZVE32F-NEXT: beqz a2, .LBB107_15
+; RV64ZVE32F-NEXT: .LBB107_14: # %cond.load25
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -11984,23 +17128,23 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 10, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 9
-; RV64ZVE32F-NEXT: .LBB97_15: # %else26
+; RV64ZVE32F-NEXT: .LBB107_15: # %else26
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 1024
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB97_30
+; RV64ZVE32F-NEXT: bnez a2, .LBB107_30
; RV64ZVE32F-NEXT: # %bb.16: # %else29
; RV64ZVE32F-NEXT: slli a2, a1, 52
-; RV64ZVE32F-NEXT: bltz a2, .LBB97_31
-; RV64ZVE32F-NEXT: .LBB97_17: # %else32
+; RV64ZVE32F-NEXT: bltz a2, .LBB107_31
+; RV64ZVE32F-NEXT: .LBB107_17: # %else32
; RV64ZVE32F-NEXT: slli a2, a1, 51
-; RV64ZVE32F-NEXT: bltz a2, .LBB97_32
-; RV64ZVE32F-NEXT: .LBB97_18: # %else35
+; RV64ZVE32F-NEXT: bltz a2, .LBB107_32
+; RV64ZVE32F-NEXT: .LBB107_18: # %else35
; RV64ZVE32F-NEXT: slli a2, a1, 50
-; RV64ZVE32F-NEXT: bgez a2, .LBB97_20
-; RV64ZVE32F-NEXT: .LBB97_19: # %cond.load37
+; RV64ZVE32F-NEXT: bgez a2, .LBB107_20
+; RV64ZVE32F-NEXT: .LBB107_19: # %cond.load37
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -12009,11 +17153,11 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 13
-; RV64ZVE32F-NEXT: .LBB97_20: # %else38
+; RV64ZVE32F-NEXT: .LBB107_20: # %else38
; RV64ZVE32F-NEXT: slli a2, a1, 49
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
-; RV64ZVE32F-NEXT: bgez a2, .LBB97_22
+; RV64ZVE32F-NEXT: bgez a2, .LBB107_22
; RV64ZVE32F-NEXT: # %bb.21: # %cond.load40
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -12021,10 +17165,10 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 14
-; RV64ZVE32F-NEXT: .LBB97_22: # %else41
+; RV64ZVE32F-NEXT: .LBB107_22: # %else41
; RV64ZVE32F-NEXT: lui a2, 1048568
; RV64ZVE32F-NEXT: and a1, a1, a2
-; RV64ZVE32F-NEXT: beqz a1, .LBB97_24
+; RV64ZVE32F-NEXT: beqz a1, .LBB107_24
; RV64ZVE32F-NEXT: # %bb.23: # %cond.load43
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -12034,10 +17178,10 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 15
-; RV64ZVE32F-NEXT: .LBB97_24: # %else44
+; RV64ZVE32F-NEXT: .LBB107_24: # %else44
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB97_25: # %cond.load4
+; RV64ZVE32F-NEXT: .LBB107_25: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
@@ -12045,8 +17189,8 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB97_6
-; RV64ZVE32F-NEXT: .LBB97_26: # %cond.load7
+; RV64ZVE32F-NEXT: beqz a2, .LBB107_6
+; RV64ZVE32F-NEXT: .LBB107_26: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -12056,9 +17200,9 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB97_7
-; RV64ZVE32F-NEXT: j .LBB97_8
-; RV64ZVE32F-NEXT: .LBB97_27: # %cond.load16
+; RV64ZVE32F-NEXT: bnez a2, .LBB107_7
+; RV64ZVE32F-NEXT: j .LBB107_8
+; RV64ZVE32F-NEXT: .LBB107_27: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
@@ -12066,8 +17210,8 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 6
; RV64ZVE32F-NEXT: andi a2, a1, 128
-; RV64ZVE32F-NEXT: beqz a2, .LBB97_12
-; RV64ZVE32F-NEXT: .LBB97_28: # %cond.load19
+; RV64ZVE32F-NEXT: beqz a2, .LBB107_12
+; RV64ZVE32F-NEXT: .LBB107_28: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -12077,8 +17221,8 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 7
; RV64ZVE32F-NEXT: andi a2, a1, 256
-; RV64ZVE32F-NEXT: beqz a2, .LBB97_13
-; RV64ZVE32F-NEXT: .LBB97_29: # %cond.load22
+; RV64ZVE32F-NEXT: beqz a2, .LBB107_13
+; RV64ZVE32F-NEXT: .LBB107_29: # %cond.load22
; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -12086,9 +17230,9 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 8
; RV64ZVE32F-NEXT: andi a2, a1, 512
-; RV64ZVE32F-NEXT: bnez a2, .LBB97_14
-; RV64ZVE32F-NEXT: j .LBB97_15
-; RV64ZVE32F-NEXT: .LBB97_30: # %cond.load28
+; RV64ZVE32F-NEXT: bnez a2, .LBB107_14
+; RV64ZVE32F-NEXT: j .LBB107_15
+; RV64ZVE32F-NEXT: .LBB107_30: # %cond.load28
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
@@ -12096,8 +17240,8 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 11, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 10
; RV64ZVE32F-NEXT: slli a2, a1, 52
-; RV64ZVE32F-NEXT: bgez a2, .LBB97_17
-; RV64ZVE32F-NEXT: .LBB97_31: # %cond.load31
+; RV64ZVE32F-NEXT: bgez a2, .LBB107_17
+; RV64ZVE32F-NEXT: .LBB107_31: # %cond.load31
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -12107,8 +17251,8 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 11
; RV64ZVE32F-NEXT: slli a2, a1, 51
-; RV64ZVE32F-NEXT: bgez a2, .LBB97_18
-; RV64ZVE32F-NEXT: .LBB97_32: # %cond.load34
+; RV64ZVE32F-NEXT: bgez a2, .LBB107_18
+; RV64ZVE32F-NEXT: .LBB107_32: # %cond.load34
; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -12116,8 +17260,8 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 12
; RV64ZVE32F-NEXT: slli a2, a1, 50
-; RV64ZVE32F-NEXT: bltz a2, .LBB97_19
-; RV64ZVE32F-NEXT: j .LBB97_20
+; RV64ZVE32F-NEXT: bltz a2, .LBB107_19
+; RV64ZVE32F-NEXT: j .LBB107_20
%ptrs = getelementptr inbounds i8, ptr %base, <16 x i8> %idxs
%v = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs, i32 2, <16 x i1> %m, <16 x i8> %passthru)
ret <16 x i8> %v
@@ -12163,16 +17307,16 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
-; RV64ZVE32F-NEXT: beqz a2, .LBB98_2
+; RV64ZVE32F-NEXT: beqz a2, .LBB108_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
-; RV64ZVE32F-NEXT: .LBB98_2: # %else
+; RV64ZVE32F-NEXT: .LBB108_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB98_4
+; RV64ZVE32F-NEXT: beqz a2, .LBB108_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
@@ -12182,31 +17326,31 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
-; RV64ZVE32F-NEXT: .LBB98_4: # %else2
+; RV64ZVE32F-NEXT: .LBB108_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB98_49
+; RV64ZVE32F-NEXT: bnez a2, .LBB108_49
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB98_50
-; RV64ZVE32F-NEXT: .LBB98_6: # %else8
+; RV64ZVE32F-NEXT: bnez a2, .LBB108_50
+; RV64ZVE32F-NEXT: .LBB108_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB98_8
-; RV64ZVE32F-NEXT: .LBB98_7: # %cond.load10
+; RV64ZVE32F-NEXT: beqz a2, .LBB108_8
+; RV64ZVE32F-NEXT: .LBB108_7: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
-; RV64ZVE32F-NEXT: .LBB98_8: # %else11
+; RV64ZVE32F-NEXT: .LBB108_8: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB98_10
+; RV64ZVE32F-NEXT: beqz a2, .LBB108_10
; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v14, v13, 1
@@ -12216,21 +17360,21 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 5
-; RV64ZVE32F-NEXT: .LBB98_10: # %else14
+; RV64ZVE32F-NEXT: .LBB108_10: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB98_51
+; RV64ZVE32F-NEXT: bnez a2, .LBB108_51
; RV64ZVE32F-NEXT: # %bb.11: # %else17
; RV64ZVE32F-NEXT: andi a2, a1, 128
-; RV64ZVE32F-NEXT: bnez a2, .LBB98_52
-; RV64ZVE32F-NEXT: .LBB98_12: # %else20
+; RV64ZVE32F-NEXT: bnez a2, .LBB108_52
+; RV64ZVE32F-NEXT: .LBB108_12: # %else20
; RV64ZVE32F-NEXT: andi a2, a1, 256
-; RV64ZVE32F-NEXT: bnez a2, .LBB98_53
-; RV64ZVE32F-NEXT: .LBB98_13: # %else23
+; RV64ZVE32F-NEXT: bnez a2, .LBB108_53
+; RV64ZVE32F-NEXT: .LBB108_13: # %else23
; RV64ZVE32F-NEXT: andi a2, a1, 512
-; RV64ZVE32F-NEXT: beqz a2, .LBB98_15
-; RV64ZVE32F-NEXT: .LBB98_14: # %cond.load25
+; RV64ZVE32F-NEXT: beqz a2, .LBB108_15
+; RV64ZVE32F-NEXT: .LBB108_14: # %cond.load25
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
@@ -12239,13 +17383,13 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v13, a2
; RV64ZVE32F-NEXT: vsetivli zero, 10, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 9
-; RV64ZVE32F-NEXT: .LBB98_15: # %else26
+; RV64ZVE32F-NEXT: .LBB108_15: # %else26
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 4
; RV64ZVE32F-NEXT: andi a2, a1, 1024
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB98_17
+; RV64ZVE32F-NEXT: beqz a2, .LBB108_17
; RV64ZVE32F-NEXT: # %bb.16: # %cond.load28
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -12253,9 +17397,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 11, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 10
-; RV64ZVE32F-NEXT: .LBB98_17: # %else29
+; RV64ZVE32F-NEXT: .LBB108_17: # %else29
; RV64ZVE32F-NEXT: slli a2, a1, 52
-; RV64ZVE32F-NEXT: bgez a2, .LBB98_19
+; RV64ZVE32F-NEXT: bgez a2, .LBB108_19
; RV64ZVE32F-NEXT: # %bb.18: # %cond.load31
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
@@ -12265,11 +17409,11 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 11
-; RV64ZVE32F-NEXT: .LBB98_19: # %else32
+; RV64ZVE32F-NEXT: .LBB108_19: # %else32
; RV64ZVE32F-NEXT: slli a2, a1, 51
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 16
-; RV64ZVE32F-NEXT: bgez a2, .LBB98_21
+; RV64ZVE32F-NEXT: bgez a2, .LBB108_21
; RV64ZVE32F-NEXT: # %bb.20: # %cond.load34
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -12277,9 +17421,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 12
-; RV64ZVE32F-NEXT: .LBB98_21: # %else35
+; RV64ZVE32F-NEXT: .LBB108_21: # %else35
; RV64ZVE32F-NEXT: slli a2, a1, 50
-; RV64ZVE32F-NEXT: bgez a2, .LBB98_23
+; RV64ZVE32F-NEXT: bgez a2, .LBB108_23
; RV64ZVE32F-NEXT: # %bb.22: # %cond.load37
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v13, 1
@@ -12289,21 +17433,21 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 13
-; RV64ZVE32F-NEXT: .LBB98_23: # %else38
+; RV64ZVE32F-NEXT: .LBB108_23: # %else38
; RV64ZVE32F-NEXT: slli a2, a1, 49
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v13, 2
-; RV64ZVE32F-NEXT: bltz a2, .LBB98_54
+; RV64ZVE32F-NEXT: bltz a2, .LBB108_54
; RV64ZVE32F-NEXT: # %bb.24: # %else41
; RV64ZVE32F-NEXT: slli a2, a1, 48
-; RV64ZVE32F-NEXT: bltz a2, .LBB98_55
-; RV64ZVE32F-NEXT: .LBB98_25: # %else44
+; RV64ZVE32F-NEXT: bltz a2, .LBB108_55
+; RV64ZVE32F-NEXT: .LBB108_25: # %else44
; RV64ZVE32F-NEXT: slli a2, a1, 47
-; RV64ZVE32F-NEXT: bltz a2, .LBB98_56
-; RV64ZVE32F-NEXT: .LBB98_26: # %else47
+; RV64ZVE32F-NEXT: bltz a2, .LBB108_56
+; RV64ZVE32F-NEXT: .LBB108_26: # %else47
; RV64ZVE32F-NEXT: slli a2, a1, 46
-; RV64ZVE32F-NEXT: bgez a2, .LBB98_28
-; RV64ZVE32F-NEXT: .LBB98_27: # %cond.load49
+; RV64ZVE32F-NEXT: bgez a2, .LBB108_28
+; RV64ZVE32F-NEXT: .LBB108_27: # %cond.load49
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -12312,31 +17456,31 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 18, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 17
-; RV64ZVE32F-NEXT: .LBB98_28: # %else50
+; RV64ZVE32F-NEXT: .LBB108_28: # %else50
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: slli a2, a1, 45
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
-; RV64ZVE32F-NEXT: bltz a2, .LBB98_57
+; RV64ZVE32F-NEXT: bltz a2, .LBB108_57
; RV64ZVE32F-NEXT: # %bb.29: # %else53
; RV64ZVE32F-NEXT: slli a2, a1, 44
-; RV64ZVE32F-NEXT: bltz a2, .LBB98_58
-; RV64ZVE32F-NEXT: .LBB98_30: # %else56
+; RV64ZVE32F-NEXT: bltz a2, .LBB108_58
+; RV64ZVE32F-NEXT: .LBB108_30: # %else56
; RV64ZVE32F-NEXT: slli a2, a1, 43
-; RV64ZVE32F-NEXT: bgez a2, .LBB98_32
-; RV64ZVE32F-NEXT: .LBB98_31: # %cond.load58
+; RV64ZVE32F-NEXT: bgez a2, .LBB108_32
+; RV64ZVE32F-NEXT: .LBB108_31: # %cond.load58
; RV64ZVE32F-NEXT: vsetivli zero, 21, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 20
-; RV64ZVE32F-NEXT: .LBB98_32: # %else59
+; RV64ZVE32F-NEXT: .LBB108_32: # %else59
; RV64ZVE32F-NEXT: slli a2, a1, 42
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 8
-; RV64ZVE32F-NEXT: bgez a2, .LBB98_34
+; RV64ZVE32F-NEXT: bgez a2, .LBB108_34
; RV64ZVE32F-NEXT: # %bb.33: # %cond.load61
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v9, 1
@@ -12346,21 +17490,21 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 22, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 21
-; RV64ZVE32F-NEXT: .LBB98_34: # %else62
+; RV64ZVE32F-NEXT: .LBB108_34: # %else62
; RV64ZVE32F-NEXT: slli a2, a1, 41
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
-; RV64ZVE32F-NEXT: bltz a2, .LBB98_59
+; RV64ZVE32F-NEXT: bltz a2, .LBB108_59
; RV64ZVE32F-NEXT: # %bb.35: # %else65
; RV64ZVE32F-NEXT: slli a2, a1, 40
-; RV64ZVE32F-NEXT: bltz a2, .LBB98_60
-; RV64ZVE32F-NEXT: .LBB98_36: # %else68
+; RV64ZVE32F-NEXT: bltz a2, .LBB108_60
+; RV64ZVE32F-NEXT: .LBB108_36: # %else68
; RV64ZVE32F-NEXT: slli a2, a1, 39
-; RV64ZVE32F-NEXT: bltz a2, .LBB98_61
-; RV64ZVE32F-NEXT: .LBB98_37: # %else71
+; RV64ZVE32F-NEXT: bltz a2, .LBB108_61
+; RV64ZVE32F-NEXT: .LBB108_37: # %else71
; RV64ZVE32F-NEXT: slli a2, a1, 38
-; RV64ZVE32F-NEXT: bgez a2, .LBB98_39
-; RV64ZVE32F-NEXT: .LBB98_38: # %cond.load73
+; RV64ZVE32F-NEXT: bgez a2, .LBB108_39
+; RV64ZVE32F-NEXT: .LBB108_38: # %cond.load73
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -12369,23 +17513,23 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 26, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 25
-; RV64ZVE32F-NEXT: .LBB98_39: # %else74
+; RV64ZVE32F-NEXT: .LBB108_39: # %else74
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: slli a2, a1, 37
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bltz a2, .LBB98_62
+; RV64ZVE32F-NEXT: bltz a2, .LBB108_62
; RV64ZVE32F-NEXT: # %bb.40: # %else77
; RV64ZVE32F-NEXT: slli a2, a1, 36
-; RV64ZVE32F-NEXT: bltz a2, .LBB98_63
-; RV64ZVE32F-NEXT: .LBB98_41: # %else80
+; RV64ZVE32F-NEXT: bltz a2, .LBB108_63
+; RV64ZVE32F-NEXT: .LBB108_41: # %else80
; RV64ZVE32F-NEXT: slli a2, a1, 35
-; RV64ZVE32F-NEXT: bltz a2, .LBB98_64
-; RV64ZVE32F-NEXT: .LBB98_42: # %else83
+; RV64ZVE32F-NEXT: bltz a2, .LBB108_64
+; RV64ZVE32F-NEXT: .LBB108_42: # %else83
; RV64ZVE32F-NEXT: slli a2, a1, 34
-; RV64ZVE32F-NEXT: bgez a2, .LBB98_44
-; RV64ZVE32F-NEXT: .LBB98_43: # %cond.load85
+; RV64ZVE32F-NEXT: bgez a2, .LBB108_44
+; RV64ZVE32F-NEXT: .LBB108_43: # %cond.load85
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -12394,11 +17538,11 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 30, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 29
-; RV64ZVE32F-NEXT: .LBB98_44: # %else86
+; RV64ZVE32F-NEXT: .LBB108_44: # %else86
; RV64ZVE32F-NEXT: slli a2, a1, 33
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: bgez a2, .LBB98_46
+; RV64ZVE32F-NEXT: bgez a2, .LBB108_46
; RV64ZVE32F-NEXT: # %bb.45: # %cond.load88
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -12406,10 +17550,10 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 31, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 30
-; RV64ZVE32F-NEXT: .LBB98_46: # %else89
+; RV64ZVE32F-NEXT: .LBB108_46: # %else89
; RV64ZVE32F-NEXT: lui a2, 524288
; RV64ZVE32F-NEXT: and a1, a1, a2
-; RV64ZVE32F-NEXT: beqz a1, .LBB98_48
+; RV64ZVE32F-NEXT: beqz a1, .LBB108_48
; RV64ZVE32F-NEXT: # %bb.47: # %cond.load91
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
@@ -12420,10 +17564,10 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 31
-; RV64ZVE32F-NEXT: .LBB98_48: # %else92
+; RV64ZVE32F-NEXT: .LBB108_48: # %else92
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB98_49: # %cond.load4
+; RV64ZVE32F-NEXT: .LBB108_49: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
@@ -12431,8 +17575,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB98_6
-; RV64ZVE32F-NEXT: .LBB98_50: # %cond.load7
+; RV64ZVE32F-NEXT: beqz a2, .LBB108_6
+; RV64ZVE32F-NEXT: .LBB108_50: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
@@ -12442,9 +17586,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB98_7
-; RV64ZVE32F-NEXT: j .LBB98_8
-; RV64ZVE32F-NEXT: .LBB98_51: # %cond.load16
+; RV64ZVE32F-NEXT: bnez a2, .LBB108_7
+; RV64ZVE32F-NEXT: j .LBB108_8
+; RV64ZVE32F-NEXT: .LBB108_51: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
@@ -12452,8 +17596,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 6
; RV64ZVE32F-NEXT: andi a2, a1, 128
-; RV64ZVE32F-NEXT: beqz a2, .LBB98_12
-; RV64ZVE32F-NEXT: .LBB98_52: # %cond.load19
+; RV64ZVE32F-NEXT: beqz a2, .LBB108_12
+; RV64ZVE32F-NEXT: .LBB108_52: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
@@ -12463,8 +17607,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 7
; RV64ZVE32F-NEXT: andi a2, a1, 256
-; RV64ZVE32F-NEXT: beqz a2, .LBB98_13
-; RV64ZVE32F-NEXT: .LBB98_53: # %cond.load22
+; RV64ZVE32F-NEXT: beqz a2, .LBB108_13
+; RV64ZVE32F-NEXT: .LBB108_53: # %cond.load22
; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -12472,9 +17616,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v13, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 8
; RV64ZVE32F-NEXT: andi a2, a1, 512
-; RV64ZVE32F-NEXT: bnez a2, .LBB98_14
-; RV64ZVE32F-NEXT: j .LBB98_15
-; RV64ZVE32F-NEXT: .LBB98_54: # %cond.load40
+; RV64ZVE32F-NEXT: bnez a2, .LBB108_14
+; RV64ZVE32F-NEXT: j .LBB108_15
+; RV64ZVE32F-NEXT: .LBB108_54: # %cond.load40
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
@@ -12482,8 +17626,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 14
; RV64ZVE32F-NEXT: slli a2, a1, 48
-; RV64ZVE32F-NEXT: bgez a2, .LBB98_25
-; RV64ZVE32F-NEXT: .LBB98_55: # %cond.load43
+; RV64ZVE32F-NEXT: bgez a2, .LBB108_25
+; RV64ZVE32F-NEXT: .LBB108_55: # %cond.load43
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -12493,8 +17637,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 15
; RV64ZVE32F-NEXT: slli a2, a1, 47
-; RV64ZVE32F-NEXT: bgez a2, .LBB98_26
-; RV64ZVE32F-NEXT: .LBB98_56: # %cond.load46
+; RV64ZVE32F-NEXT: bgez a2, .LBB108_26
+; RV64ZVE32F-NEXT: .LBB108_56: # %cond.load46
; RV64ZVE32F-NEXT: vsetivli zero, 17, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -12502,9 +17646,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 16
; RV64ZVE32F-NEXT: slli a2, a1, 46
-; RV64ZVE32F-NEXT: bltz a2, .LBB98_27
-; RV64ZVE32F-NEXT: j .LBB98_28
-; RV64ZVE32F-NEXT: .LBB98_57: # %cond.load52
+; RV64ZVE32F-NEXT: bltz a2, .LBB108_27
+; RV64ZVE32F-NEXT: j .LBB108_28
+; RV64ZVE32F-NEXT: .LBB108_57: # %cond.load52
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
@@ -12512,8 +17656,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 19, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 18
; RV64ZVE32F-NEXT: slli a2, a1, 44
-; RV64ZVE32F-NEXT: bgez a2, .LBB98_30
-; RV64ZVE32F-NEXT: .LBB98_58: # %cond.load55
+; RV64ZVE32F-NEXT: bgez a2, .LBB108_30
+; RV64ZVE32F-NEXT: .LBB108_58: # %cond.load55
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
@@ -12523,9 +17667,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 20, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 19
; RV64ZVE32F-NEXT: slli a2, a1, 43
-; RV64ZVE32F-NEXT: bltz a2, .LBB98_31
-; RV64ZVE32F-NEXT: j .LBB98_32
-; RV64ZVE32F-NEXT: .LBB98_59: # %cond.load64
+; RV64ZVE32F-NEXT: bltz a2, .LBB108_31
+; RV64ZVE32F-NEXT: j .LBB108_32
+; RV64ZVE32F-NEXT: .LBB108_59: # %cond.load64
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
@@ -12533,8 +17677,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 23, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 22
; RV64ZVE32F-NEXT: slli a2, a1, 40
-; RV64ZVE32F-NEXT: bgez a2, .LBB98_36
-; RV64ZVE32F-NEXT: .LBB98_60: # %cond.load67
+; RV64ZVE32F-NEXT: bgez a2, .LBB108_36
+; RV64ZVE32F-NEXT: .LBB108_60: # %cond.load67
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -12544,8 +17688,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 24, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 23
; RV64ZVE32F-NEXT: slli a2, a1, 39
-; RV64ZVE32F-NEXT: bgez a2, .LBB98_37
-; RV64ZVE32F-NEXT: .LBB98_61: # %cond.load70
+; RV64ZVE32F-NEXT: bgez a2, .LBB108_37
+; RV64ZVE32F-NEXT: .LBB108_61: # %cond.load70
; RV64ZVE32F-NEXT: vsetivli zero, 25, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -12553,9 +17697,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 24
; RV64ZVE32F-NEXT: slli a2, a1, 38
-; RV64ZVE32F-NEXT: bltz a2, .LBB98_38
-; RV64ZVE32F-NEXT: j .LBB98_39
-; RV64ZVE32F-NEXT: .LBB98_62: # %cond.load76
+; RV64ZVE32F-NEXT: bltz a2, .LBB108_38
+; RV64ZVE32F-NEXT: j .LBB108_39
+; RV64ZVE32F-NEXT: .LBB108_62: # %cond.load76
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
@@ -12563,8 +17707,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 27, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 26
; RV64ZVE32F-NEXT: slli a2, a1, 36
-; RV64ZVE32F-NEXT: bgez a2, .LBB98_41
-; RV64ZVE32F-NEXT: .LBB98_63: # %cond.load79
+; RV64ZVE32F-NEXT: bgez a2, .LBB108_41
+; RV64ZVE32F-NEXT: .LBB108_63: # %cond.load79
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -12574,8 +17718,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vsetivli zero, 28, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 27
; RV64ZVE32F-NEXT: slli a2, a1, 35
-; RV64ZVE32F-NEXT: bgez a2, .LBB98_42
-; RV64ZVE32F-NEXT: .LBB98_64: # %cond.load82
+; RV64ZVE32F-NEXT: bgez a2, .LBB108_42
+; RV64ZVE32F-NEXT: .LBB108_64: # %cond.load82
; RV64ZVE32F-NEXT: vsetivli zero, 29, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -12583,8 +17727,8 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 28
; RV64ZVE32F-NEXT: slli a2, a1, 34
-; RV64ZVE32F-NEXT: bltz a2, .LBB98_43
-; RV64ZVE32F-NEXT: j .LBB98_44
+; RV64ZVE32F-NEXT: bltz a2, .LBB108_43
+; RV64ZVE32F-NEXT: j .LBB108_44
%ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs
%v = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %ptrs, i32 2, <32 x i1> %m, <32 x i8> %passthru)
ret <32 x i8> %v
@@ -13136,8 +18280,8 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) {
; RV32-LABEL: mgather_gather_2xSEW_unaligned:
; RV32: # %bb.0:
-; RV32-NEXT: lui a1, %hi(.LCPI113_0)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI113_0)
+; RV32-NEXT: lui a1, %hi(.LCPI123_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI123_0)
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vle8.v v9, (a1)
; RV32-NEXT: vluxei8.v v8, (a0), v9
@@ -13145,8 +18289,8 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) {
;
; RV64V-LABEL: mgather_gather_2xSEW_unaligned:
; RV64V: # %bb.0:
-; RV64V-NEXT: lui a1, %hi(.LCPI113_0)
-; RV64V-NEXT: addi a1, a1, %lo(.LCPI113_0)
+; RV64V-NEXT: lui a1, %hi(.LCPI123_0)
+; RV64V-NEXT: addi a1, a1, %lo(.LCPI123_0)
; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64V-NEXT: vle8.v v9, (a1)
; RV64V-NEXT: vluxei8.v v8, (a0), v9
@@ -13184,8 +18328,8 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) {
define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) {
; RV32-LABEL: mgather_gather_2xSEW_unaligned2:
; RV32: # %bb.0:
-; RV32-NEXT: lui a1, %hi(.LCPI114_0)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI114_0)
+; RV32-NEXT: lui a1, %hi(.LCPI124_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI124_0)
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vle8.v v9, (a1)
; RV32-NEXT: vluxei8.v v8, (a0), v9
@@ -13193,8 +18337,8 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) {
;
; RV64V-LABEL: mgather_gather_2xSEW_unaligned2:
; RV64V: # %bb.0:
-; RV64V-NEXT: lui a1, %hi(.LCPI114_0)
-; RV64V-NEXT: addi a1, a1, %lo(.LCPI114_0)
+; RV64V-NEXT: lui a1, %hi(.LCPI124_0)
+; RV64V-NEXT: addi a1, a1, %lo(.LCPI124_0)
; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64V-NEXT: vle8.v v9, (a1)
; RV64V-NEXT: vluxei8.v v8, (a0), v9
@@ -13385,8 +18529,8 @@ define <8 x i16> @mgather_shuffle_rotate(ptr %base) {
define <8 x i16> @mgather_shuffle_vrgather(ptr %base) {
; RV32-LABEL: mgather_shuffle_vrgather:
; RV32: # %bb.0:
-; RV32-NEXT: lui a1, %hi(.LCPI119_0)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI119_0)
+; RV32-NEXT: lui a1, %hi(.LCPI129_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI129_0)
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vle16.v v9, (a1)
; RV32-NEXT: vle16.v v10, (a0)
@@ -13395,8 +18539,8 @@ define <8 x i16> @mgather_shuffle_vrgather(ptr %base) {
;
; RV64V-LABEL: mgather_shuffle_vrgather:
; RV64V: # %bb.0:
-; RV64V-NEXT: lui a1, %hi(.LCPI119_0)
-; RV64V-NEXT: addi a1, a1, %lo(.LCPI119_0)
+; RV64V-NEXT: lui a1, %hi(.LCPI129_0)
+; RV64V-NEXT: addi a1, a1, %lo(.LCPI129_0)
; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64V-NEXT: vle16.v v9, (a1)
; RV64V-NEXT: vle16.v v10, (a0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
index a445c8fe081725..3b642f1678e469 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
@@ -1,12 +1,21 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zve32f,+zvl128b -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zve32f,+zvl128b -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64ZVE32F
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V,RV32V-ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RV64V-ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V,RV32V-ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RV64V-ZVFHMIN
+
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F,RV32ZVE32F-ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F,RV64ZVE32F-ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F,RV32ZVE32F-ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F,RV64ZVE32F-ZVFHMIN
declare void @llvm.masked.scatter.v1i8.v1p0(<1 x i8>, <1 x ptr>, i32, <1 x i1>)
@@ -17,11 +26,11 @@ define void @mscatter_v1i8(<1 x i8> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_v1i8:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v1i8:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_v1i8:
; RV32ZVE32F: # %bb.0:
@@ -52,11 +61,11 @@ define void @mscatter_v2i8(<2 x i8> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_v2i8:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v2i8:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_v2i8:
; RV32ZVE32F: # %bb.0:
@@ -97,12 +106,12 @@ define void @mscatter_v2i16_truncstore_v2i8(<2 x i16> %val, <2 x ptr> %ptrs, <2
; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_v2i16_truncstore_v2i8:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; RV64-NEXT: vnsrl.wi v8, v8, 0
-; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v2i16_truncstore_v2i8:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; RV64V-NEXT: vnsrl.wi v8, v8, 0
+; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_v2i16_truncstore_v2i8:
; RV32ZVE32F: # %bb.0:
@@ -148,14 +157,14 @@ define void @mscatter_v2i32_truncstore_v2i8(<2 x i32> %val, <2 x ptr> %ptrs, <2
; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_v2i32_truncstore_v2i8:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT: vnsrl.wi v8, v8, 0
-; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; RV64-NEXT: vnsrl.wi v8, v8, 0
-; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v2i32_truncstore_v2i8:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64V-NEXT: vnsrl.wi v8, v8, 0
+; RV64V-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; RV64V-NEXT: vnsrl.wi v8, v8, 0
+; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i8:
; RV32ZVE32F: # %bb.0:
@@ -207,16 +216,16 @@ define void @mscatter_v2i64_truncstore_v2i8(<2 x i64> %val, <2 x ptr> %ptrs, <2
; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_v2i64_truncstore_v2i8:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-NEXT: vnsrl.wi v8, v8, 0
-; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; RV64-NEXT: vnsrl.wi v8, v8, 0
-; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; RV64-NEXT: vnsrl.wi v8, v8, 0
-; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v2i64_truncstore_v2i8:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV64V-NEXT: vnsrl.wi v8, v8, 0
+; RV64V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; RV64V-NEXT: vnsrl.wi v8, v8, 0
+; RV64V-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; RV64V-NEXT: vnsrl.wi v8, v8, 0
+; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i8:
; RV32ZVE32F: # %bb.0:
@@ -267,11 +276,11 @@ define void @mscatter_v4i8(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_v4i8:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v4i8:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_v4i8:
; RV64ZVE32F: # %bb.0:
@@ -327,11 +336,11 @@ define void @mscatter_truemask_v4i8(<4 x i8> %val, <4 x ptr> %ptrs) {
; RV32-NEXT: vsoxei32.v v8, (zero), v9
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_truemask_v4i8:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v10
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_truemask_v4i8:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_truemask_v4i8:
; RV64ZVE32F: # %bb.0:
@@ -369,11 +378,11 @@ define void @mscatter_v8i8(<8 x i8> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_v8i8:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v8i8:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_v8i8:
; RV64ZVE32F: # %bb.0:
@@ -471,13 +480,13 @@ define void @mscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8
; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_v8i8:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf8 v12, v9
-; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_v8i8:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf8 v12, v9
+; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8:
; RV64ZVE32F: # %bb.0:
@@ -593,11 +602,11 @@ define void @mscatter_v1i16(<1 x i16> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_v1i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v1i16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_v1i16:
; RV32ZVE32F: # %bb.0:
@@ -628,11 +637,11 @@ define void @mscatter_v2i16(<2 x i16> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_v2i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v2i16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_v2i16:
; RV32ZVE32F: # %bb.0:
@@ -673,12 +682,12 @@ define void @mscatter_v2i32_truncstore_v2i16(<2 x i32> %val, <2 x ptr> %ptrs, <2
; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_v2i32_truncstore_v2i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT: vnsrl.wi v8, v8, 0
-; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v2i32_truncstore_v2i16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64V-NEXT: vnsrl.wi v8, v8, 0
+; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i16:
; RV32ZVE32F: # %bb.0:
@@ -725,14 +734,14 @@ define void @mscatter_v2i64_truncstore_v2i16(<2 x i64> %val, <2 x ptr> %ptrs, <2
; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_v2i64_truncstore_v2i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-NEXT: vnsrl.wi v8, v8, 0
-; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; RV64-NEXT: vnsrl.wi v8, v8, 0
-; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v2i64_truncstore_v2i16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV64V-NEXT: vnsrl.wi v8, v8, 0
+; RV64V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; RV64V-NEXT: vnsrl.wi v8, v8, 0
+; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i16:
; RV32ZVE32F: # %bb.0:
@@ -785,11 +794,11 @@ define void @mscatter_v4i16(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_v4i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v4i16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_v4i16:
; RV64ZVE32F: # %bb.0:
@@ -845,11 +854,11 @@ define void @mscatter_truemask_v4i16(<4 x i16> %val, <4 x ptr> %ptrs) {
; RV32-NEXT: vsoxei32.v v8, (zero), v9
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_truemask_v4i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v10
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_truemask_v4i16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_truemask_v4i16:
; RV64ZVE32F: # %bb.0:
@@ -887,11 +896,11 @@ define void @mscatter_v8i16(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_v8i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v8i16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_v8i16:
; RV64ZVE32F: # %bb.0:
@@ -990,14 +999,14 @@ define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %id
; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_v8i8_v8i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf8 v12, v9
-; RV64-NEXT: vadd.vv v12, v12, v12
-; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_v8i8_v8i16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf8 v12, v9
+; RV64V-NEXT: vadd.vv v12, v12, v12
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i16:
; RV64ZVE32F: # %bb.0:
@@ -1123,14 +1132,14 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8
; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf8 v12, v9
-; RV64-NEXT: vadd.vv v12, v12, v12
-; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8i16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf8 v12, v9
+; RV64V-NEXT: vadd.vv v12, v12, v12
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i16:
; RV64ZVE32F: # %bb.0:
@@ -1256,13 +1265,13 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8
; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT: vwaddu.vv v10, v9, v9
-; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8i16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64V-NEXT: vwaddu.vv v10, v9, v9
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-NEXT: vsoxei16.v v8, (a0), v10, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i16:
; RV64ZVE32F: # %bb.0:
@@ -1395,14 +1404,14 @@ define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs,
; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_v8i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf4 v12, v9
-; RV64-NEXT: vadd.vv v12, v12, v12
-; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_v8i16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf4 v12, v9
+; RV64V-NEXT: vadd.vv v12, v12, v12
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16:
; RV64ZVE32F: # %bb.0:
@@ -1526,11 +1535,11 @@ define void @mscatter_v1i32(<1 x i32> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_v1i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v1i32:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_v1i32:
; RV32ZVE32F: # %bb.0:
@@ -1561,11 +1570,11 @@ define void @mscatter_v2i32(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_v2i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v2i32:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_v2i32:
; RV32ZVE32F: # %bb.0:
@@ -1606,12 +1615,12 @@ define void @mscatter_v2i64_truncstore_v2i32(<2 x i64> %val, <2 x ptr> %ptrs, <2
; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_v2i64_truncstore_v2i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-NEXT: vnsrl.wi v8, v8, 0
-; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v2i64_truncstore_v2i32:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV64V-NEXT: vnsrl.wi v8, v8, 0
+; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i32:
; RV32ZVE32F: # %bb.0:
@@ -1662,11 +1671,11 @@ define void @mscatter_v4i32(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_v4i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v4i32:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_v4i32:
; RV64ZVE32F: # %bb.0:
@@ -1722,11 +1731,11 @@ define void @mscatter_truemask_v4i32(<4 x i32> %val, <4 x ptr> %ptrs) {
; RV32-NEXT: vsoxei32.v v8, (zero), v9
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_truemask_v4i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v10
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_truemask_v4i32:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_truemask_v4i32:
; RV64ZVE32F: # %bb.0:
@@ -1764,11 +1773,11 @@ define void @mscatter_v8i32(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_v8i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v8i32:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_v8i32:
; RV64ZVE32F: # %bb.0:
@@ -1870,14 +1879,14 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %id
; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_v8i8_v8i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf8 v12, v10
-; RV64-NEXT: vsll.vi v12, v12, 2
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_v8i8_v8i32:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf8 v12, v10
+; RV64V-NEXT: vsll.vi v12, v12, 2
+; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i32:
; RV64ZVE32F: # %bb.0:
@@ -2007,14 +2016,14 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf8 v12, v10
-; RV64-NEXT: vsll.vi v12, v12, 2
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8i32:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf8 v12, v10
+; RV64V-NEXT: vsll.vi v12, v12, 2
+; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i32:
; RV64ZVE32F: # %bb.0:
@@ -2146,14 +2155,14 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8
; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vzext.vf2 v11, v10
-; RV64-NEXT: vsll.vi v10, v11, 2
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8i32:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vzext.vf2 v11, v10
+; RV64V-NEXT: vsll.vi v10, v11, 2
+; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64V-NEXT: vsoxei16.v v8, (a0), v10, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i32:
; RV64ZVE32F: # %bb.0:
@@ -2292,14 +2301,14 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %
; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_v8i16_v8i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf4 v12, v10
-; RV64-NEXT: vsll.vi v12, v12, 2
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_v8i16_v8i32:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf4 v12, v10
+; RV64V-NEXT: vsll.vi v12, v12, 2
+; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8i32:
; RV64ZVE32F: # %bb.0:
@@ -2430,14 +2439,14 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_sext_v8i16_v8i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf4 v12, v10
-; RV64-NEXT: vsll.vi v12, v12, 2
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_sext_v8i16_v8i32:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf4 v12, v10
+; RV64V-NEXT: vsll.vi v12, v12, 2
+; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8i32:
; RV64ZVE32F: # %bb.0:
@@ -2569,13 +2578,13 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i
; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV64-NEXT: vzext.vf2 v12, v10
-; RV64-NEXT: vsll.vi v10, v12, 2
-; RV64-NEXT: vsoxei32.v v8, (a0), v10, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_zext_v8i16_v8i32:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64V-NEXT: vzext.vf2 v12, v10
+; RV64V-NEXT: vsll.vi v10, v12, 2
+; RV64V-NEXT: vsoxei32.v v8, (a0), v10, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i32:
; RV64ZVE32F: # %bb.0:
@@ -2716,14 +2725,14 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs,
; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_v8i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf2 v12, v10
-; RV64-NEXT: vsll.vi v12, v12, 2
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_v8i32:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf2 v12, v10
+; RV64V-NEXT: vsll.vi v12, v12, 2
+; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_baseidx_v8i32:
; RV64ZVE32F: # %bb.0:
@@ -2849,11 +2858,11 @@ define void @mscatter_v1i64(<1 x i64> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_v1i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v1i64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_v1i64:
; RV32ZVE32F: # %bb.0:
@@ -2890,11 +2899,11 @@ define void @mscatter_v2i64(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_v2i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v2i64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_v2i64:
; RV32ZVE32F: # %bb.0:
@@ -2957,11 +2966,11 @@ define void @mscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV32V-NEXT: vsoxei32.v v8, (zero), v10, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_v4i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v4i64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_v4i64:
; RV32ZVE32F: # %bb.0:
@@ -3070,11 +3079,11 @@ define void @mscatter_truemask_v4i64(<4 x i64> %val, <4 x ptr> %ptrs) {
; RV32V-NEXT: vsoxei32.v v8, (zero), v10
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_truemask_v4i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v10
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_truemask_v4i64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_truemask_v4i64:
; RV32ZVE32F: # %bb.0:
@@ -3140,11 +3149,11 @@ define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV32V-NEXT: vsoxei32.v v8, (zero), v12, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_v8i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v8i64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_v8i64:
; RV32ZVE32F: # %bb.0:
@@ -3368,13 +3377,13 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id
; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_v8i8_v8i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf8 v16, v12
-; RV64-NEXT: vsll.vi v12, v16, 3
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_v8i8_v8i64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf8 v16, v12
+; RV64V-NEXT: vsll.vi v12, v16, 3
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i64:
; RV32ZVE32F: # %bb.0:
@@ -3612,13 +3621,13 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf8 v16, v12
-; RV64-NEXT: vsll.vi v12, v16, 3
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8i64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf8 v16, v12
+; RV64V-NEXT: vsll.vi v12, v16, 3
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i64:
; RV32ZVE32F: # %bb.0:
@@ -3857,14 +3866,14 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8
; RV32V-NEXT: vsoxei16.v v8, (a0), v12, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vzext.vf2 v13, v12
-; RV64-NEXT: vsll.vi v12, v13, 3
-; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8i64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vzext.vf2 v13, v12
+; RV64V-NEXT: vsll.vi v12, v13, 3
+; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-NEXT: vsoxei16.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i64:
; RV32ZVE32F: # %bb.0:
@@ -4111,13 +4120,13 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %
; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_v8i16_v8i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf4 v16, v12
-; RV64-NEXT: vsll.vi v12, v16, 3
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_v8i16_v8i64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf4 v16, v12
+; RV64V-NEXT: vsll.vi v12, v16, 3
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_baseidx_v8i16_v8i64:
; RV32ZVE32F: # %bb.0:
@@ -4356,13 +4365,13 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_sext_v8i16_v8i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf4 v16, v12
-; RV64-NEXT: vsll.vi v12, v16, 3
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_sext_v8i16_v8i64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf4 v16, v12
+; RV64V-NEXT: vsll.vi v12, v16, 3
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8i64:
; RV32ZVE32F: # %bb.0:
@@ -4602,14 +4611,14 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV64-NEXT: vzext.vf2 v14, v12
-; RV64-NEXT: vsll.vi v12, v14, 3
-; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_zext_v8i16_v8i64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64V-NEXT: vzext.vf2 v14, v12
+; RV64V-NEXT: vsll.vi v12, v14, 3
+; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i64:
; RV32ZVE32F: # %bb.0:
@@ -4858,13 +4867,13 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %
; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_v8i32_v8i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf2 v16, v12
-; RV64-NEXT: vsll.vi v12, v16, 3
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_v8i32_v8i64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf2 v16, v12
+; RV64V-NEXT: vsll.vi v12, v16, 3
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_baseidx_v8i32_v8i64:
; RV32ZVE32F: # %bb.0:
@@ -5101,13 +5110,13 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_sext_v8i32_v8i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf2 v16, v12
-; RV64-NEXT: vsll.vi v12, v16, 3
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_sext_v8i32_v8i64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf2 v16, v12
+; RV64V-NEXT: vsll.vi v12, v16, 3
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8i64:
; RV32ZVE32F: # %bb.0:
@@ -5345,13 +5354,13 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i
; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_zext_v8i32_v8i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vzext.vf2 v16, v12
-; RV64-NEXT: vsll.vi v12, v16, 3
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_zext_v8i32_v8i64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vzext.vf2 v16, v12
+; RV64V-NEXT: vsll.vi v12, v16, 3
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8i64:
; RV32ZVE32F: # %bb.0:
@@ -5598,12 +5607,12 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_v8i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsll.vi v12, v12, 3
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_v8i64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsll.vi v12, v12, 3
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_baseidx_v8i64:
; RV32ZVE32F: # %bb.0:
@@ -5874,63 +5883,167 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs,
ret void
}
-declare void @llvm.masked.scatter.v1f16.v1p0(<1 x half>, <1 x ptr>, i32, <1 x i1>)
+declare void @llvm.masked.scatter.v1bf16.v1p0(<1 x bfloat>, <1 x ptr>, i32, <1 x i1>)
-define void @mscatter_v1f16(<1 x half> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
-; RV32V-LABEL: mscatter_v1f16:
+define void @mscatter_v1bf16(<1 x bfloat> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
+; RV32V-LABEL: mscatter_v1bf16:
; RV32V: # %bb.0:
-; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
+; RV32V-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; RV32V-NEXT: vfirst.m a0, v0
+; RV32V-NEXT: bnez a0, .LBB52_2
+; RV32V-NEXT: # %bb.1: # %cond.store
+; RV32V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; RV32V-NEXT: vmv.x.s a0, v8
+; RV32V-NEXT: fmv.h.x fa5, a0
+; RV32V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV32V-NEXT: vmv.x.s a0, v9
+; RV32V-NEXT: fsh fa5, 0(a0)
+; RV32V-NEXT: .LBB52_2: # %else
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_v1f16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v1bf16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; RV64V-NEXT: vfirst.m a0, v0
+; RV64V-NEXT: bnez a0, .LBB52_2
+; RV64V-NEXT: # %bb.1: # %cond.store
+; RV64V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; RV64V-NEXT: vmv.x.s a0, v8
+; RV64V-NEXT: fmv.h.x fa5, a0
+; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; RV64V-NEXT: vmv.x.s a0, v9
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: .LBB52_2: # %else
+; RV64V-NEXT: ret
;
-; RV32ZVE32F-LABEL: mscatter_v1f16:
+; RV32ZVE32F-LABEL: mscatter_v1bf16:
; RV32ZVE32F: # %bb.0:
-; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
+; RV32ZVE32F-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; RV32ZVE32F-NEXT: vfirst.m a0, v0
+; RV32ZVE32F-NEXT: bnez a0, .LBB52_2
+; RV32ZVE32F-NEXT: # %bb.1: # %cond.store
+; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV32ZVE32F-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32ZVE32F-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-NEXT: .LBB52_2: # %else
; RV32ZVE32F-NEXT: ret
;
-; RV64ZVE32F-LABEL: mscatter_v1f16:
+; RV64ZVE32F-LABEL: mscatter_v1bf16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vfirst.m a1, v0
; RV64ZVE32F-NEXT: bnez a1, .LBB52_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vse16.v v8, (a0)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-NEXT: vmv.x.s a1, v8
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a1
+; RV64ZVE32F-NEXT: fsh fa5, 0(a0)
; RV64ZVE32F-NEXT: .LBB52_2: # %else
; RV64ZVE32F-NEXT: ret
- call void @llvm.masked.scatter.v1f16.v1p0(<1 x half> %val, <1 x ptr> %ptrs, i32 2, <1 x i1> %m)
+ call void @llvm.masked.scatter.v1bf16.v1p0(<1 x bfloat> %val, <1 x ptr> %ptrs, i32 2, <1 x i1> %m)
ret void
}
-declare void @llvm.masked.scatter.v2f16.v2p0(<2 x half>, <2 x ptr>, i32, <2 x i1>)
+declare void @llvm.masked.scatter.v2bf16.v2p0(<2 x bfloat>, <2 x ptr>, i32, <2 x i1>)
-define void @mscatter_v2f16(<2 x half> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
-; RV32V-LABEL: mscatter_v2f16:
+define void @mscatter_v2bf16(<2 x bfloat> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
+; RV32V-LABEL: mscatter_v2bf16:
; RV32V: # %bb.0:
-; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
+; RV32V-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32V-NEXT: vmv.x.s a0, v0
+; RV32V-NEXT: andi a1, a0, 1
+; RV32V-NEXT: bnez a1, .LBB53_3
+; RV32V-NEXT: # %bb.1: # %else
+; RV32V-NEXT: andi a0, a0, 2
+; RV32V-NEXT: bnez a0, .LBB53_4
+; RV32V-NEXT: .LBB53_2: # %else2
+; RV32V-NEXT: ret
+; RV32V-NEXT: .LBB53_3: # %cond.store
+; RV32V-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV32V-NEXT: vmv.x.s a1, v8
+; RV32V-NEXT: fmv.h.x fa5, a1
+; RV32V-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV32V-NEXT: vmv.x.s a1, v9
+; RV32V-NEXT: fsh fa5, 0(a1)
+; RV32V-NEXT: andi a0, a0, 2
+; RV32V-NEXT: beqz a0, .LBB53_2
+; RV32V-NEXT: .LBB53_4: # %cond.store1
+; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV32V-NEXT: vslidedown.vi v8, v8, 1
+; RV32V-NEXT: vmv.x.s a0, v8
+; RV32V-NEXT: fmv.h.x fa5, a0
+; RV32V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV32V-NEXT: vslidedown.vi v8, v9, 1
+; RV32V-NEXT: vmv.x.s a0, v8
+; RV32V-NEXT: fsh fa5, 0(a0)
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_v2f16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v2bf16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64V-NEXT: vmv.x.s a0, v0
+; RV64V-NEXT: andi a1, a0, 1
+; RV64V-NEXT: bnez a1, .LBB53_3
+; RV64V-NEXT: # %bb.1: # %else
+; RV64V-NEXT: andi a0, a0, 2
+; RV64V-NEXT: bnez a0, .LBB53_4
+; RV64V-NEXT: .LBB53_2: # %else2
+; RV64V-NEXT: ret
+; RV64V-NEXT: .LBB53_3: # %cond.store
+; RV64V-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64V-NEXT: vmv.x.s a1, v8
+; RV64V-NEXT: fmv.h.x fa5, a1
+; RV64V-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; RV64V-NEXT: vmv.x.s a1, v9
+; RV64V-NEXT: fsh fa5, 0(a1)
+; RV64V-NEXT: andi a0, a0, 2
+; RV64V-NEXT: beqz a0, .LBB53_2
+; RV64V-NEXT: .LBB53_4: # %cond.store1
+; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV64V-NEXT: vslidedown.vi v8, v8, 1
+; RV64V-NEXT: vmv.x.s a0, v8
+; RV64V-NEXT: fmv.h.x fa5, a0
+; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v8, v9, 1
+; RV64V-NEXT: vmv.x.s a0, v8
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: ret
;
-; RV32ZVE32F-LABEL: mscatter_v2f16:
+; RV32ZVE32F-LABEL: mscatter_v2bf16:
; RV32ZVE32F: # %bb.0:
-; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
+; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32ZVE32F-NEXT: vmv.x.s a0, v0
+; RV32ZVE32F-NEXT: andi a1, a0, 1
+; RV32ZVE32F-NEXT: bnez a1, .LBB53_3
+; RV32ZVE32F-NEXT: # %bb.1: # %else
+; RV32ZVE32F-NEXT: andi a0, a0, 2
+; RV32ZVE32F-NEXT: bnez a0, .LBB53_4
+; RV32ZVE32F-NEXT: .LBB53_2: # %else2
+; RV32ZVE32F-NEXT: ret
+; RV32ZVE32F-NEXT: .LBB53_3: # %cond.store
+; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV32ZVE32F-NEXT: vmv.x.s a1, v8
+; RV32ZVE32F-NEXT: fmv.h.x fa5, a1
+; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV32ZVE32F-NEXT: vmv.x.s a1, v9
+; RV32ZVE32F-NEXT: fsh fa5, 0(a1)
+; RV32ZVE32F-NEXT: andi a0, a0, 2
+; RV32ZVE32F-NEXT: beqz a0, .LBB53_2
+; RV32ZVE32F-NEXT: .LBB53_4: # %cond.store1
+; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
+; RV32ZVE32F-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
+; RV32ZVE32F-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-NEXT: fsh fa5, 0(a0)
; RV32ZVE32F-NEXT: ret
;
-; RV64ZVE32F-LABEL: mscatter_v2f16:
+; RV64ZVE32F-LABEL: mscatter_v2bf16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
@@ -5942,35 +6055,145 @@ define void @mscatter_v2f16(<2 x half> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV64ZVE32F-NEXT: .LBB53_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB53_3: # %cond.store
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vse16.v v8, (a0)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32F-NEXT: vmv.x.s a3, v8
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a0)
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB53_2
; RV64ZVE32F-NEXT: .LBB53_4: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV64ZVE32F-NEXT: vse16.v v8, (a1)
+; RV64ZVE32F-NEXT: vmv.x.s a0, v8
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
+; RV64ZVE32F-NEXT: fsh fa5, 0(a1)
; RV64ZVE32F-NEXT: ret
- call void @llvm.masked.scatter.v2f16.v2p0(<2 x half> %val, <2 x ptr> %ptrs, i32 2, <2 x i1> %m)
+ call void @llvm.masked.scatter.v2bf16.v2p0(<2 x bfloat> %val, <2 x ptr> %ptrs, i32 2, <2 x i1> %m)
ret void
}
-declare void @llvm.masked.scatter.v4f16.v4p0(<4 x half>, <4 x ptr>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v4bf16.v4p0(<4 x bfloat>, <4 x ptr>, i32, <4 x i1>)
-define void @mscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
-; RV32-LABEL: mscatter_v4f16:
+define void @mscatter_v4bf16(<4 x bfloat> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
+; RV32-LABEL: mscatter_v4bf16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vmv.x.s a0, v0
+; RV32-NEXT: andi a1, a0, 1
+; RV32-NEXT: bnez a1, .LBB54_5
+; RV32-NEXT: # %bb.1: # %else
+; RV32-NEXT: andi a1, a0, 2
+; RV32-NEXT: bnez a1, .LBB54_6
+; RV32-NEXT: .LBB54_2: # %else2
+; RV32-NEXT: andi a1, a0, 4
+; RV32-NEXT: bnez a1, .LBB54_7
+; RV32-NEXT: .LBB54_3: # %else4
+; RV32-NEXT: andi a0, a0, 8
+; RV32-NEXT: bnez a0, .LBB54_8
+; RV32-NEXT: .LBB54_4: # %else6
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB54_5: # %cond.store
+; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT: vmv.x.s a1, v8
+; RV32-NEXT: fmv.h.x fa5, a1
+; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.x.s a1, v9
+; RV32-NEXT: fsh fa5, 0(a1)
+; RV32-NEXT: andi a1, a0, 2
+; RV32-NEXT: beqz a1, .LBB54_2
+; RV32-NEXT: .LBB54_6: # %cond.store1
+; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v8, 1
+; RV32-NEXT: vmv.x.s a1, v10
+; RV32-NEXT: fmv.h.x fa5, a1
+; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v9, 1
+; RV32-NEXT: vmv.x.s a1, v10
+; RV32-NEXT: fsh fa5, 0(a1)
+; RV32-NEXT: andi a1, a0, 4
+; RV32-NEXT: beqz a1, .LBB54_3
+; RV32-NEXT: .LBB54_7: # %cond.store3
+; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v8, 2
+; RV32-NEXT: vmv.x.s a1, v10
+; RV32-NEXT: fmv.h.x fa5, a1
+; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v9, 2
+; RV32-NEXT: vmv.x.s a1, v10
+; RV32-NEXT: fsh fa5, 0(a1)
+; RV32-NEXT: andi a0, a0, 8
+; RV32-NEXT: beqz a0, .LBB54_4
+; RV32-NEXT: .LBB54_8: # %cond.store5
+; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v8, 3
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v9, 3
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: fsh fa5, 0(a0)
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_v4f16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v4bf16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64V-NEXT: vmv.x.s a0, v0
+; RV64V-NEXT: andi a1, a0, 1
+; RV64V-NEXT: bnez a1, .LBB54_5
+; RV64V-NEXT: # %bb.1: # %else
+; RV64V-NEXT: andi a1, a0, 2
+; RV64V-NEXT: bnez a1, .LBB54_6
+; RV64V-NEXT: .LBB54_2: # %else2
+; RV64V-NEXT: andi a1, a0, 4
+; RV64V-NEXT: bnez a1, .LBB54_7
+; RV64V-NEXT: .LBB54_3: # %else4
+; RV64V-NEXT: andi a0, a0, 8
+; RV64V-NEXT: bnez a0, .LBB54_8
+; RV64V-NEXT: .LBB54_4: # %else6
+; RV64V-NEXT: ret
+; RV64V-NEXT: .LBB54_5: # %cond.store
+; RV64V-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64V-NEXT: vmv.x.s a1, v8
+; RV64V-NEXT: fmv.h.x fa5, a1
+; RV64V-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; RV64V-NEXT: vmv.x.s a1, v10
+; RV64V-NEXT: fsh fa5, 0(a1)
+; RV64V-NEXT: andi a1, a0, 2
+; RV64V-NEXT: beqz a1, .LBB54_2
+; RV64V-NEXT: .LBB54_6: # %cond.store1
+; RV64V-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64V-NEXT: vslidedown.vi v9, v8, 1
+; RV64V-NEXT: vmv.x.s a1, v9
+; RV64V-NEXT: fmv.h.x fa5, a1
+; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v9, v10, 1
+; RV64V-NEXT: vmv.x.s a1, v9
+; RV64V-NEXT: fsh fa5, 0(a1)
+; RV64V-NEXT: andi a1, a0, 4
+; RV64V-NEXT: beqz a1, .LBB54_3
+; RV64V-NEXT: .LBB54_7: # %cond.store3
+; RV64V-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64V-NEXT: vslidedown.vi v9, v8, 2
+; RV64V-NEXT: vmv.x.s a1, v9
+; RV64V-NEXT: fmv.h.x fa5, a1
+; RV64V-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; RV64V-NEXT: vslidedown.vi v12, v10, 2
+; RV64V-NEXT: vmv.x.s a1, v12
+; RV64V-NEXT: fsh fa5, 0(a1)
+; RV64V-NEXT: andi a0, a0, 8
+; RV64V-NEXT: beqz a0, .LBB54_4
+; RV64V-NEXT: .LBB54_8: # %cond.store5
+; RV64V-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64V-NEXT: vslidedown.vi v8, v8, 3
+; RV64V-NEXT: vmv.x.s a0, v8
+; RV64V-NEXT: fmv.h.x fa5, a0
+; RV64V-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; RV64V-NEXT: vslidedown.vi v8, v10, 3
+; RV64V-NEXT: vmv.x.s a0, v8
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: ret
;
-; RV64ZVE32F-LABEL: mscatter_v4f16:
+; RV64ZVE32F-LABEL: mscatter_v4bf16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ld a4, 8(a0)
; RV64ZVE32F-NEXT: ld a2, 16(a0)
@@ -5992,87 +6215,389 @@ define void @mscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB54_5: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vse16.v v8, (a0)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32F-NEXT: vmv.x.s a5, v8
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a5
+; RV64ZVE32F-NEXT: fsh fa5, 0(a0)
; RV64ZVE32F-NEXT: andi a0, a3, 2
; RV64ZVE32F-NEXT: beqz a0, .LBB54_2
; RV64ZVE32F-NEXT: .LBB54_6: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
-; RV64ZVE32F-NEXT: vse16.v v9, (a4)
+; RV64ZVE32F-NEXT: vmv.x.s a0, v9
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
+; RV64ZVE32F-NEXT: fsh fa5, 0(a4)
; RV64ZVE32F-NEXT: andi a0, a3, 4
; RV64ZVE32F-NEXT: beqz a0, .LBB54_3
; RV64ZVE32F-NEXT: .LBB54_7: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
-; RV64ZVE32F-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-NEXT: vmv.x.s a0, v9
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: andi a3, a3, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB54_4
; RV64ZVE32F-NEXT: .LBB54_8: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
-; RV64ZVE32F-NEXT: vse16.v v8, (a1)
+; RV64ZVE32F-NEXT: vmv.x.s a0, v8
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
+; RV64ZVE32F-NEXT: fsh fa5, 0(a1)
; RV64ZVE32F-NEXT: ret
- call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> %m)
+ call void @llvm.masked.scatter.v4bf16.v4p0(<4 x bfloat> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> %m)
ret void
}
-define void @mscatter_truemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs) {
-; RV32-LABEL: mscatter_truemask_v4f16:
+define void @mscatter_truemask_v4bf16(<4 x bfloat> %val, <4 x ptr> %ptrs) {
+; RV32-LABEL: mscatter_truemask_v4bf16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vsoxei32.v v8, (zero), v9
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v8, 1
+; RV32-NEXT: vmv.x.s a0, v10
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v9, 1
+; RV32-NEXT: vmv.x.s a0, v10
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v8, 2
+; RV32-NEXT: vmv.x.s a0, v10
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v9, 2
+; RV32-NEXT: vmv.x.s a0, v10
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v8, 3
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v9, 3
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: fsh fa5, 0(a0)
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_truemask_v4f16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v10
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_truemask_v4bf16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64V-NEXT: vmv.x.s a0, v8
+; RV64V-NEXT: fmv.h.x fa5, a0
+; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-NEXT: vmv.x.s a0, v10
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64V-NEXT: vslidedown.vi v9, v8, 1
+; RV64V-NEXT: vmv.x.s a0, v9
+; RV64V-NEXT: fmv.h.x fa5, a0
+; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v9, v10, 1
+; RV64V-NEXT: vmv.x.s a0, v9
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64V-NEXT: vslidedown.vi v9, v8, 2
+; RV64V-NEXT: vmv.x.s a0, v9
+; RV64V-NEXT: fmv.h.x fa5, a0
+; RV64V-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; RV64V-NEXT: vslidedown.vi v12, v10, 2
+; RV64V-NEXT: vmv.x.s a0, v12
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64V-NEXT: vslidedown.vi v8, v8, 3
+; RV64V-NEXT: vmv.x.s a0, v8
+; RV64V-NEXT: fmv.h.x fa5, a0
+; RV64V-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; RV64V-NEXT: vslidedown.vi v8, v10, 3
+; RV64V-NEXT: vmv.x.s a0, v8
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: ret
;
-; RV64ZVE32F-LABEL: mscatter_truemask_v4f16:
+; RV64ZVE32F-LABEL: mscatter_truemask_v4bf16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ld a1, 0(a0)
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: ld a3, 16(a0)
; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: vse16.v v8, (a1)
+; RV64ZVE32F-NEXT: vmv.x.s a4, v8
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a4
+; RV64ZVE32F-NEXT: fsh fa5, 0(a1)
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
-; RV64ZVE32F-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-NEXT: vmv.x.s a1, v9
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a1
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
-; RV64ZVE32F-NEXT: vse16.v v9, (a3)
+; RV64ZVE32F-NEXT: vmv.x.s a1, v9
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a1
+; RV64ZVE32F-NEXT: fsh fa5, 0(a3)
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
-; RV64ZVE32F-NEXT: vse16.v v8, (a0)
+; RV64ZVE32F-NEXT: vmv.x.s a1, v8
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a1
+; RV64ZVE32F-NEXT: fsh fa5, 0(a0)
; RV64ZVE32F-NEXT: ret
- call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1))
+ call void @llvm.masked.scatter.v4bf16.v4p0(<4 x bfloat> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1))
ret void
}
-define void @mscatter_falsemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs) {
-; CHECK-LABEL: mscatter_falsemask_v4f16:
+define void @mscatter_falsemask_v4bf16(<4 x bfloat> %val, <4 x ptr> %ptrs) {
+; CHECK-LABEL: mscatter_falsemask_v4bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: ret
- call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer)
+ call void @llvm.masked.scatter.v4bf16.v4p0(<4 x bfloat> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer)
ret void
}
-declare void @llvm.masked.scatter.v8f16.v8p0(<8 x half>, <8 x ptr>, i32, <8 x i1>)
+declare void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat>, <8 x ptr>, i32, <8 x i1>)
-define void @mscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
-; RV32-LABEL: mscatter_v8f16:
+define void @mscatter_v8bf16(<8 x bfloat> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
+; RV32-LABEL: mscatter_v8bf16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vmv.x.s a0, v0
+; RV32-NEXT: andi a1, a0, 1
+; RV32-NEXT: bnez a1, .LBB57_9
+; RV32-NEXT: # %bb.1: # %else
+; RV32-NEXT: andi a1, a0, 2
+; RV32-NEXT: bnez a1, .LBB57_10
+; RV32-NEXT: .LBB57_2: # %else2
+; RV32-NEXT: andi a1, a0, 4
+; RV32-NEXT: bnez a1, .LBB57_11
+; RV32-NEXT: .LBB57_3: # %else4
+; RV32-NEXT: andi a1, a0, 8
+; RV32-NEXT: bnez a1, .LBB57_12
+; RV32-NEXT: .LBB57_4: # %else6
+; RV32-NEXT: andi a1, a0, 16
+; RV32-NEXT: bnez a1, .LBB57_13
+; RV32-NEXT: .LBB57_5: # %else8
+; RV32-NEXT: andi a1, a0, 32
+; RV32-NEXT: bnez a1, .LBB57_14
+; RV32-NEXT: .LBB57_6: # %else10
+; RV32-NEXT: andi a1, a0, 64
+; RV32-NEXT: bnez a1, .LBB57_15
+; RV32-NEXT: .LBB57_7: # %else12
+; RV32-NEXT: andi a0, a0, -128
+; RV32-NEXT: bnez a0, .LBB57_16
+; RV32-NEXT: .LBB57_8: # %else14
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB57_9: # %cond.store
+; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT: vmv.x.s a1, v8
+; RV32-NEXT: fmv.h.x fa5, a1
+; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV32-NEXT: vmv.x.s a1, v10
+; RV32-NEXT: fsh fa5, 0(a1)
+; RV32-NEXT: andi a1, a0, 2
+; RV32-NEXT: beqz a1, .LBB57_2
+; RV32-NEXT: .LBB57_10: # %cond.store1
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 1
+; RV32-NEXT: vmv.x.s a1, v9
+; RV32-NEXT: fmv.h.x fa5, a1
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v10, 1
+; RV32-NEXT: vmv.x.s a1, v9
+; RV32-NEXT: fsh fa5, 0(a1)
+; RV32-NEXT: andi a1, a0, 4
+; RV32-NEXT: beqz a1, .LBB57_3
+; RV32-NEXT: .LBB57_11: # %cond.store3
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 2
+; RV32-NEXT: vmv.x.s a1, v9
+; RV32-NEXT: fmv.h.x fa5, a1
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v10, 2
+; RV32-NEXT: vmv.x.s a1, v9
+; RV32-NEXT: fsh fa5, 0(a1)
+; RV32-NEXT: andi a1, a0, 8
+; RV32-NEXT: beqz a1, .LBB57_4
+; RV32-NEXT: .LBB57_12: # %cond.store5
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 3
+; RV32-NEXT: vmv.x.s a1, v9
+; RV32-NEXT: fmv.h.x fa5, a1
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v10, 3
+; RV32-NEXT: vmv.x.s a1, v9
+; RV32-NEXT: fsh fa5, 0(a1)
+; RV32-NEXT: andi a1, a0, 16
+; RV32-NEXT: beqz a1, .LBB57_5
+; RV32-NEXT: .LBB57_13: # %cond.store7
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 4
+; RV32-NEXT: vmv.x.s a1, v9
+; RV32-NEXT: fmv.h.x fa5, a1
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v10, 4
+; RV32-NEXT: vmv.x.s a1, v12
+; RV32-NEXT: fsh fa5, 0(a1)
+; RV32-NEXT: andi a1, a0, 32
+; RV32-NEXT: beqz a1, .LBB57_6
+; RV32-NEXT: .LBB57_14: # %cond.store9
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 5
+; RV32-NEXT: vmv.x.s a1, v9
+; RV32-NEXT: fmv.h.x fa5, a1
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v10, 5
+; RV32-NEXT: vmv.x.s a1, v12
+; RV32-NEXT: fsh fa5, 0(a1)
+; RV32-NEXT: andi a1, a0, 64
+; RV32-NEXT: beqz a1, .LBB57_7
+; RV32-NEXT: .LBB57_15: # %cond.store11
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 6
+; RV32-NEXT: vmv.x.s a1, v9
+; RV32-NEXT: fmv.h.x fa5, a1
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v10, 6
+; RV32-NEXT: vmv.x.s a1, v12
+; RV32-NEXT: fsh fa5, 0(a1)
+; RV32-NEXT: andi a0, a0, -128
+; RV32-NEXT: beqz a0, .LBB57_8
+; RV32-NEXT: .LBB57_16: # %cond.store13
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v8, 7
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v10, 7
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: fsh fa5, 0(a0)
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_v8f16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v8bf16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64V-NEXT: vmv.x.s a0, v0
+; RV64V-NEXT: andi a1, a0, 1
+; RV64V-NEXT: bnez a1, .LBB57_11
+; RV64V-NEXT: # %bb.1: # %else
+; RV64V-NEXT: andi a1, a0, 2
+; RV64V-NEXT: bnez a1, .LBB57_12
+; RV64V-NEXT: .LBB57_2: # %else2
+; RV64V-NEXT: andi a1, a0, 4
+; RV64V-NEXT: bnez a1, .LBB57_13
+; RV64V-NEXT: .LBB57_3: # %else4
+; RV64V-NEXT: andi a1, a0, 8
+; RV64V-NEXT: beqz a1, .LBB57_5
+; RV64V-NEXT: .LBB57_4: # %cond.store5
+; RV64V-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v9, v8, 3
+; RV64V-NEXT: vmv.x.s a1, v9
+; RV64V-NEXT: fmv.h.x fa5, a1
+; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-NEXT: vslidedown.vi v10, v12, 3
+; RV64V-NEXT: vmv.x.s a1, v10
+; RV64V-NEXT: fsh fa5, 0(a1)
+; RV64V-NEXT: .LBB57_5: # %else6
+; RV64V-NEXT: addi sp, sp, -320
+; RV64V-NEXT: .cfi_def_cfa_offset 320
+; RV64V-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
+; RV64V-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
+; RV64V-NEXT: .cfi_offset ra, -8
+; RV64V-NEXT: .cfi_offset s0, -16
+; RV64V-NEXT: addi s0, sp, 320
+; RV64V-NEXT: .cfi_def_cfa s0, 0
+; RV64V-NEXT: andi sp, sp, -64
+; RV64V-NEXT: andi a1, a0, 16
+; RV64V-NEXT: bnez a1, .LBB57_14
+; RV64V-NEXT: # %bb.6: # %else8
+; RV64V-NEXT: andi a1, a0, 32
+; RV64V-NEXT: bnez a1, .LBB57_15
+; RV64V-NEXT: .LBB57_7: # %else10
+; RV64V-NEXT: andi a1, a0, 64
+; RV64V-NEXT: bnez a1, .LBB57_16
+; RV64V-NEXT: .LBB57_8: # %else12
+; RV64V-NEXT: andi a0, a0, -128
+; RV64V-NEXT: beqz a0, .LBB57_10
+; RV64V-NEXT: .LBB57_9: # %cond.store13
+; RV64V-NEXT: mv a0, sp
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 56(sp)
+; RV64V-NEXT: vslidedown.vi v8, v8, 7
+; RV64V-NEXT: vmv.x.s a1, v8
+; RV64V-NEXT: fmv.h.x fa5, a1
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: .LBB57_10: # %else14
+; RV64V-NEXT: addi sp, s0, -320
+; RV64V-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
+; RV64V-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
+; RV64V-NEXT: addi sp, sp, 320
+; RV64V-NEXT: ret
+; RV64V-NEXT: .LBB57_11: # %cond.store
+; RV64V-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64V-NEXT: vmv.x.s a1, v8
+; RV64V-NEXT: fmv.h.x fa5, a1
+; RV64V-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; RV64V-NEXT: vmv.x.s a1, v12
+; RV64V-NEXT: fsh fa5, 0(a1)
+; RV64V-NEXT: andi a1, a0, 2
+; RV64V-NEXT: beqz a1, .LBB57_2
+; RV64V-NEXT: .LBB57_12: # %cond.store1
+; RV64V-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v9, v8, 1
+; RV64V-NEXT: vmv.x.s a1, v9
+; RV64V-NEXT: fmv.h.x fa5, a1
+; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v9, v12, 1
+; RV64V-NEXT: vmv.x.s a1, v9
+; RV64V-NEXT: fsh fa5, 0(a1)
+; RV64V-NEXT: andi a1, a0, 4
+; RV64V-NEXT: beqz a1, .LBB57_3
+; RV64V-NEXT: .LBB57_13: # %cond.store3
+; RV64V-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v9, v8, 2
+; RV64V-NEXT: vmv.x.s a1, v9
+; RV64V-NEXT: fmv.h.x fa5, a1
+; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-NEXT: vslidedown.vi v10, v12, 2
+; RV64V-NEXT: vmv.x.s a1, v10
+; RV64V-NEXT: fsh fa5, 0(a1)
+; RV64V-NEXT: andi a1, a0, 8
+; RV64V-NEXT: bnez a1, .LBB57_4
+; RV64V-NEXT: j .LBB57_5
+; RV64V-NEXT: .LBB57_14: # %cond.store7
+; RV64V-NEXT: addi a1, sp, 192
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vse64.v v12, (a1)
+; RV64V-NEXT: ld a1, 224(sp)
+; RV64V-NEXT: vslidedown.vi v9, v8, 4
+; RV64V-NEXT: vmv.x.s a2, v9
+; RV64V-NEXT: fmv.h.x fa5, a2
+; RV64V-NEXT: fsh fa5, 0(a1)
+; RV64V-NEXT: andi a1, a0, 32
+; RV64V-NEXT: beqz a1, .LBB57_7
+; RV64V-NEXT: .LBB57_15: # %cond.store9
+; RV64V-NEXT: addi a1, sp, 128
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vse64.v v12, (a1)
+; RV64V-NEXT: ld a1, 168(sp)
+; RV64V-NEXT: vslidedown.vi v9, v8, 5
+; RV64V-NEXT: vmv.x.s a2, v9
+; RV64V-NEXT: fmv.h.x fa5, a2
+; RV64V-NEXT: fsh fa5, 0(a1)
+; RV64V-NEXT: andi a1, a0, 64
+; RV64V-NEXT: beqz a1, .LBB57_8
+; RV64V-NEXT: .LBB57_16: # %cond.store11
+; RV64V-NEXT: addi a1, sp, 64
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vse64.v v12, (a1)
+; RV64V-NEXT: ld a1, 112(sp)
+; RV64V-NEXT: vslidedown.vi v9, v8, 6
+; RV64V-NEXT: vmv.x.s a2, v9
+; RV64V-NEXT: fmv.h.x fa5, a2
+; RV64V-NEXT: fsh fa5, 0(a1)
+; RV64V-NEXT: andi a0, a0, -128
+; RV64V-NEXT: bnez a0, .LBB57_9
+; RV64V-NEXT: j .LBB57_10
;
-; RV64ZVE32F-LABEL: mscatter_v8f16:
+; RV64ZVE32F-LABEL: mscatter_v8bf16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ld a3, 40(a0)
; RV64ZVE32F-NEXT: ld a2, 48(a0)
@@ -6110,75 +6635,325 @@ define void @mscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB57_9: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vse16.v v8, (a0)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32F-NEXT: vmv.x.s t1, v8
+; RV64ZVE32F-NEXT: fmv.h.x fa5, t1
+; RV64ZVE32F-NEXT: fsh fa5, 0(a0)
; RV64ZVE32F-NEXT: andi a0, a4, 2
; RV64ZVE32F-NEXT: beqz a0, .LBB57_2
; RV64ZVE32F-NEXT: .LBB57_10: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
-; RV64ZVE32F-NEXT: vse16.v v9, (t0)
+; RV64ZVE32F-NEXT: vmv.x.s a0, v9
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
+; RV64ZVE32F-NEXT: fsh fa5, 0(t0)
; RV64ZVE32F-NEXT: andi a0, a4, 4
; RV64ZVE32F-NEXT: beqz a0, .LBB57_3
; RV64ZVE32F-NEXT: .LBB57_11: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
-; RV64ZVE32F-NEXT: vse16.v v9, (a7)
+; RV64ZVE32F-NEXT: vmv.x.s a0, v9
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
+; RV64ZVE32F-NEXT: fsh fa5, 0(a7)
; RV64ZVE32F-NEXT: andi a0, a4, 8
; RV64ZVE32F-NEXT: beqz a0, .LBB57_4
; RV64ZVE32F-NEXT: .LBB57_12: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
-; RV64ZVE32F-NEXT: vse16.v v9, (a6)
+; RV64ZVE32F-NEXT: vmv.x.s a0, v9
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
+; RV64ZVE32F-NEXT: fsh fa5, 0(a6)
; RV64ZVE32F-NEXT: andi a0, a4, 16
; RV64ZVE32F-NEXT: beqz a0, .LBB57_5
; RV64ZVE32F-NEXT: .LBB57_13: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
-; RV64ZVE32F-NEXT: vse16.v v9, (a5)
+; RV64ZVE32F-NEXT: vmv.x.s a0, v9
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
+; RV64ZVE32F-NEXT: fsh fa5, 0(a5)
; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB57_6
; RV64ZVE32F-NEXT: .LBB57_14: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
-; RV64ZVE32F-NEXT: vse16.v v9, (a3)
+; RV64ZVE32F-NEXT: vmv.x.s a0, v9
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
+; RV64ZVE32F-NEXT: fsh fa5, 0(a3)
; RV64ZVE32F-NEXT: andi a0, a4, 64
; RV64ZVE32F-NEXT: beqz a0, .LBB57_7
; RV64ZVE32F-NEXT: .LBB57_15: # %cond.store11
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6
-; RV64ZVE32F-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-NEXT: vmv.x.s a0, v9
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB57_8
; RV64ZVE32F-NEXT: .LBB57_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
-; RV64ZVE32F-NEXT: vse16.v v8, (a1)
+; RV64ZVE32F-NEXT: vmv.x.s a0, v8
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
+; RV64ZVE32F-NEXT: fsh fa5, 0(a1)
; RV64ZVE32F-NEXT: ret
- call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
+ call void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
ret void
}
-define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
-; RV32-LABEL: mscatter_baseidx_v8i8_v8f16:
+define void @mscatter_baseidx_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
+; RV32-LABEL: mscatter_baseidx_v8i8_v8bf16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vsext.vf4 v10, v9
; RV32-NEXT: vadd.vv v10, v10, v10
+; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32-NEXT: vmv.x.s a1, v0
+; RV32-NEXT: andi a2, a1, 1
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vadd.vx v10, v10, a0
+; RV32-NEXT: bnez a2, .LBB58_9
+; RV32-NEXT: # %bb.1: # %else
+; RV32-NEXT: andi a0, a1, 2
+; RV32-NEXT: bnez a0, .LBB58_10
+; RV32-NEXT: .LBB58_2: # %else2
+; RV32-NEXT: andi a0, a1, 4
+; RV32-NEXT: bnez a0, .LBB58_11
+; RV32-NEXT: .LBB58_3: # %else4
+; RV32-NEXT: andi a0, a1, 8
+; RV32-NEXT: bnez a0, .LBB58_12
+; RV32-NEXT: .LBB58_4: # %else6
+; RV32-NEXT: andi a0, a1, 16
+; RV32-NEXT: bnez a0, .LBB58_13
+; RV32-NEXT: .LBB58_5: # %else8
+; RV32-NEXT: andi a0, a1, 32
+; RV32-NEXT: bnez a0, .LBB58_14
+; RV32-NEXT: .LBB58_6: # %else10
+; RV32-NEXT: andi a0, a1, 64
+; RV32-NEXT: bnez a0, .LBB58_15
+; RV32-NEXT: .LBB58_7: # %else12
+; RV32-NEXT: andi a0, a1, -128
+; RV32-NEXT: bnez a0, .LBB58_16
+; RV32-NEXT: .LBB58_8: # %else14
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB58_9: # %cond.store
; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.x.s a0, v10
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: andi a0, a1, 2
+; RV32-NEXT: beqz a0, .LBB58_2
+; RV32-NEXT: .LBB58_10: # %cond.store1
+; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 1
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v10, 1
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: andi a0, a1, 4
+; RV32-NEXT: beqz a0, .LBB58_3
+; RV32-NEXT: .LBB58_11: # %cond.store3
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 2
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v10, 2
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: andi a0, a1, 8
+; RV32-NEXT: beqz a0, .LBB58_4
+; RV32-NEXT: .LBB58_12: # %cond.store5
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 3
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v10, 3
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: andi a0, a1, 16
+; RV32-NEXT: beqz a0, .LBB58_5
+; RV32-NEXT: .LBB58_13: # %cond.store7
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 4
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v10, 4
+; RV32-NEXT: vmv.x.s a0, v12
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: andi a0, a1, 32
+; RV32-NEXT: beqz a0, .LBB58_6
+; RV32-NEXT: .LBB58_14: # %cond.store9
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 5
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v10, 5
+; RV32-NEXT: vmv.x.s a0, v12
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: andi a0, a1, 64
+; RV32-NEXT: beqz a0, .LBB58_7
+; RV32-NEXT: .LBB58_15: # %cond.store11
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 6
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v10, 6
+; RV32-NEXT: vmv.x.s a0, v12
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: andi a0, a1, -128
+; RV32-NEXT: beqz a0, .LBB58_8
+; RV32-NEXT: .LBB58_16: # %cond.store13
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v8, 7
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v10, 7
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: fsh fa5, 0(a0)
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_v8i8_v8f16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf8 v12, v9
-; RV64-NEXT: vadd.vv v12, v12, v12
-; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_v8i8_v8bf16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf8 v12, v9
+; RV64V-NEXT: vadd.vv v12, v12, v12
+; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV64V-NEXT: vmv.x.s a1, v0
+; RV64V-NEXT: andi a2, a1, 1
+; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-NEXT: vadd.vx v12, v12, a0
+; RV64V-NEXT: bnez a2, .LBB58_11
+; RV64V-NEXT: # %bb.1: # %else
+; RV64V-NEXT: andi a0, a1, 2
+; RV64V-NEXT: bnez a0, .LBB58_12
+; RV64V-NEXT: .LBB58_2: # %else2
+; RV64V-NEXT: andi a0, a1, 4
+; RV64V-NEXT: bnez a0, .LBB58_13
+; RV64V-NEXT: .LBB58_3: # %else4
+; RV64V-NEXT: andi a0, a1, 8
+; RV64V-NEXT: beqz a0, .LBB58_5
+; RV64V-NEXT: .LBB58_4: # %cond.store5
+; RV64V-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v9, v8, 3
+; RV64V-NEXT: vmv.x.s a0, v9
+; RV64V-NEXT: fmv.h.x fa5, a0
+; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-NEXT: vslidedown.vi v10, v12, 3
+; RV64V-NEXT: vmv.x.s a0, v10
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: .LBB58_5: # %else6
+; RV64V-NEXT: addi sp, sp, -320
+; RV64V-NEXT: .cfi_def_cfa_offset 320
+; RV64V-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
+; RV64V-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
+; RV64V-NEXT: .cfi_offset ra, -8
+; RV64V-NEXT: .cfi_offset s0, -16
+; RV64V-NEXT: addi s0, sp, 320
+; RV64V-NEXT: .cfi_def_cfa s0, 0
+; RV64V-NEXT: andi sp, sp, -64
+; RV64V-NEXT: andi a0, a1, 16
+; RV64V-NEXT: bnez a0, .LBB58_14
+; RV64V-NEXT: # %bb.6: # %else8
+; RV64V-NEXT: andi a0, a1, 32
+; RV64V-NEXT: bnez a0, .LBB58_15
+; RV64V-NEXT: .LBB58_7: # %else10
+; RV64V-NEXT: andi a0, a1, 64
+; RV64V-NEXT: bnez a0, .LBB58_16
+; RV64V-NEXT: .LBB58_8: # %else12
+; RV64V-NEXT: andi a0, a1, -128
+; RV64V-NEXT: beqz a0, .LBB58_10
+; RV64V-NEXT: .LBB58_9: # %cond.store13
+; RV64V-NEXT: mv a0, sp
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 56(sp)
+; RV64V-NEXT: vslidedown.vi v8, v8, 7
+; RV64V-NEXT: vmv.x.s a1, v8
+; RV64V-NEXT: fmv.h.x fa5, a1
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: .LBB58_10: # %else14
+; RV64V-NEXT: addi sp, s0, -320
+; RV64V-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
+; RV64V-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
+; RV64V-NEXT: addi sp, sp, 320
+; RV64V-NEXT: ret
+; RV64V-NEXT: .LBB58_11: # %cond.store
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-NEXT: vmv.x.s a0, v8
+; RV64V-NEXT: fmv.h.x fa5, a0
+; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-NEXT: vmv.x.s a0, v12
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: andi a0, a1, 2
+; RV64V-NEXT: beqz a0, .LBB58_2
+; RV64V-NEXT: .LBB58_12: # %cond.store1
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v9, v8, 1
+; RV64V-NEXT: vmv.x.s a0, v9
+; RV64V-NEXT: fmv.h.x fa5, a0
+; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v9, v12, 1
+; RV64V-NEXT: vmv.x.s a0, v9
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: andi a0, a1, 4
+; RV64V-NEXT: beqz a0, .LBB58_3
+; RV64V-NEXT: .LBB58_13: # %cond.store3
+; RV64V-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v9, v8, 2
+; RV64V-NEXT: vmv.x.s a0, v9
+; RV64V-NEXT: fmv.h.x fa5, a0
+; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-NEXT: vslidedown.vi v10, v12, 2
+; RV64V-NEXT: vmv.x.s a0, v10
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: andi a0, a1, 8
+; RV64V-NEXT: bnez a0, .LBB58_4
+; RV64V-NEXT: j .LBB58_5
+; RV64V-NEXT: .LBB58_14: # %cond.store7
+; RV64V-NEXT: addi a0, sp, 192
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 224(sp)
+; RV64V-NEXT: vslidedown.vi v9, v8, 4
+; RV64V-NEXT: vmv.x.s a2, v9
+; RV64V-NEXT: fmv.h.x fa5, a2
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: andi a0, a1, 32
+; RV64V-NEXT: beqz a0, .LBB58_7
+; RV64V-NEXT: .LBB58_15: # %cond.store9
+; RV64V-NEXT: addi a0, sp, 128
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 168(sp)
+; RV64V-NEXT: vslidedown.vi v9, v8, 5
+; RV64V-NEXT: vmv.x.s a2, v9
+; RV64V-NEXT: fmv.h.x fa5, a2
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: andi a0, a1, 64
+; RV64V-NEXT: beqz a0, .LBB58_8
+; RV64V-NEXT: .LBB58_16: # %cond.store11
+; RV64V-NEXT: addi a0, sp, 64
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 112(sp)
+; RV64V-NEXT: vslidedown.vi v9, v8, 6
+; RV64V-NEXT: vmv.x.s a2, v9
+; RV64V-NEXT: fmv.h.x fa5, a2
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: andi a0, a1, -128
+; RV64V-NEXT: bnez a0, .LBB58_9
+; RV64V-NEXT: j .LBB58_10
;
-; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8f16:
+; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8bf16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
@@ -6188,8 +6963,10 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vse16.v v8, (a2)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32F-NEXT: vmv.x.s a3, v8
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: .LBB58_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB58_4
@@ -6201,7 +6978,9 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
-; RV64ZVE32F-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v10
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: .LBB58_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
@@ -6226,7 +7005,9 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
-; RV64ZVE32F-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: .LBB58_9: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
@@ -6243,7 +7024,9 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
-; RV64ZVE32F-NEXT: vse16.v v11, (a2)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v11
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB58_6
; RV64ZVE32F-NEXT: .LBB58_13: # %cond.store5
@@ -6254,7 +7037,9 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
-; RV64ZVE32F-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB58_7
; RV64ZVE32F-NEXT: .LBB58_14: # %cond.store7
@@ -6264,7 +7049,9 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
-; RV64ZVE32F-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB58_8
; RV64ZVE32F-NEXT: j .LBB58_9
@@ -6274,7 +7061,9 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
-; RV64ZVE32F-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v10
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB58_11
; RV64ZVE32F-NEXT: .LBB58_16: # %cond.store13
@@ -6285,33 +7074,269 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
-; RV64ZVE32F-NEXT: vse16.v v8, (a0)
+; RV64ZVE32F-NEXT: vmv.x.s a1, v8
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a1
+; RV64ZVE32F-NEXT: fsh fa5, 0(a0)
; RV64ZVE32F-NEXT: ret
- %ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs
- call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
+ %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i8> %idxs
+ call void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
ret void
}
-define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
-; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
+define void @mscatter_baseidx_sext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
+; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8bf16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vsext.vf4 v10, v9
; RV32-NEXT: vadd.vv v10, v10, v10
+; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32-NEXT: vmv.x.s a1, v0
+; RV32-NEXT: andi a2, a1, 1
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vadd.vx v10, v10, a0
+; RV32-NEXT: bnez a2, .LBB59_9
+; RV32-NEXT: # %bb.1: # %else
+; RV32-NEXT: andi a0, a1, 2
+; RV32-NEXT: bnez a0, .LBB59_10
+; RV32-NEXT: .LBB59_2: # %else2
+; RV32-NEXT: andi a0, a1, 4
+; RV32-NEXT: bnez a0, .LBB59_11
+; RV32-NEXT: .LBB59_3: # %else4
+; RV32-NEXT: andi a0, a1, 8
+; RV32-NEXT: bnez a0, .LBB59_12
+; RV32-NEXT: .LBB59_4: # %else6
+; RV32-NEXT: andi a0, a1, 16
+; RV32-NEXT: bnez a0, .LBB59_13
+; RV32-NEXT: .LBB59_5: # %else8
+; RV32-NEXT: andi a0, a1, 32
+; RV32-NEXT: bnez a0, .LBB59_14
+; RV32-NEXT: .LBB59_6: # %else10
+; RV32-NEXT: andi a0, a1, 64
+; RV32-NEXT: bnez a0, .LBB59_15
+; RV32-NEXT: .LBB59_7: # %else12
+; RV32-NEXT: andi a0, a1, -128
+; RV32-NEXT: bnez a0, .LBB59_16
+; RV32-NEXT: .LBB59_8: # %else14
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB59_9: # %cond.store
; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.x.s a0, v10
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: andi a0, a1, 2
+; RV32-NEXT: beqz a0, .LBB59_2
+; RV32-NEXT: .LBB59_10: # %cond.store1
+; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 1
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v10, 1
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: andi a0, a1, 4
+; RV32-NEXT: beqz a0, .LBB59_3
+; RV32-NEXT: .LBB59_11: # %cond.store3
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 2
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v10, 2
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: andi a0, a1, 8
+; RV32-NEXT: beqz a0, .LBB59_4
+; RV32-NEXT: .LBB59_12: # %cond.store5
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 3
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v10, 3
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: andi a0, a1, 16
+; RV32-NEXT: beqz a0, .LBB59_5
+; RV32-NEXT: .LBB59_13: # %cond.store7
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 4
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v10, 4
+; RV32-NEXT: vmv.x.s a0, v12
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: andi a0, a1, 32
+; RV32-NEXT: beqz a0, .LBB59_6
+; RV32-NEXT: .LBB59_14: # %cond.store9
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 5
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v10, 5
+; RV32-NEXT: vmv.x.s a0, v12
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: andi a0, a1, 64
+; RV32-NEXT: beqz a0, .LBB59_7
+; RV32-NEXT: .LBB59_15: # %cond.store11
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 6
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v10, 6
+; RV32-NEXT: vmv.x.s a0, v12
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: andi a0, a1, -128
+; RV32-NEXT: beqz a0, .LBB59_8
+; RV32-NEXT: .LBB59_16: # %cond.store13
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v8, 7
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v10, 7
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: fsh fa5, 0(a0)
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf8 v12, v9
-; RV64-NEXT: vadd.vv v12, v12, v12
-; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8bf16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf8 v12, v9
+; RV64V-NEXT: vadd.vv v12, v12, v12
+; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV64V-NEXT: vmv.x.s a1, v0
+; RV64V-NEXT: andi a2, a1, 1
+; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-NEXT: vadd.vx v12, v12, a0
+; RV64V-NEXT: bnez a2, .LBB59_11
+; RV64V-NEXT: # %bb.1: # %else
+; RV64V-NEXT: andi a0, a1, 2
+; RV64V-NEXT: bnez a0, .LBB59_12
+; RV64V-NEXT: .LBB59_2: # %else2
+; RV64V-NEXT: andi a0, a1, 4
+; RV64V-NEXT: bnez a0, .LBB59_13
+; RV64V-NEXT: .LBB59_3: # %else4
+; RV64V-NEXT: andi a0, a1, 8
+; RV64V-NEXT: beqz a0, .LBB59_5
+; RV64V-NEXT: .LBB59_4: # %cond.store5
+; RV64V-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v9, v8, 3
+; RV64V-NEXT: vmv.x.s a0, v9
+; RV64V-NEXT: fmv.h.x fa5, a0
+; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-NEXT: vslidedown.vi v10, v12, 3
+; RV64V-NEXT: vmv.x.s a0, v10
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: .LBB59_5: # %else6
+; RV64V-NEXT: addi sp, sp, -320
+; RV64V-NEXT: .cfi_def_cfa_offset 320
+; RV64V-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
+; RV64V-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
+; RV64V-NEXT: .cfi_offset ra, -8
+; RV64V-NEXT: .cfi_offset s0, -16
+; RV64V-NEXT: addi s0, sp, 320
+; RV64V-NEXT: .cfi_def_cfa s0, 0
+; RV64V-NEXT: andi sp, sp, -64
+; RV64V-NEXT: andi a0, a1, 16
+; RV64V-NEXT: bnez a0, .LBB59_14
+; RV64V-NEXT: # %bb.6: # %else8
+; RV64V-NEXT: andi a0, a1, 32
+; RV64V-NEXT: bnez a0, .LBB59_15
+; RV64V-NEXT: .LBB59_7: # %else10
+; RV64V-NEXT: andi a0, a1, 64
+; RV64V-NEXT: bnez a0, .LBB59_16
+; RV64V-NEXT: .LBB59_8: # %else12
+; RV64V-NEXT: andi a0, a1, -128
+; RV64V-NEXT: beqz a0, .LBB59_10
+; RV64V-NEXT: .LBB59_9: # %cond.store13
+; RV64V-NEXT: mv a0, sp
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 56(sp)
+; RV64V-NEXT: vslidedown.vi v8, v8, 7
+; RV64V-NEXT: vmv.x.s a1, v8
+; RV64V-NEXT: fmv.h.x fa5, a1
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: .LBB59_10: # %else14
+; RV64V-NEXT: addi sp, s0, -320
+; RV64V-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
+; RV64V-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
+; RV64V-NEXT: addi sp, sp, 320
+; RV64V-NEXT: ret
+; RV64V-NEXT: .LBB59_11: # %cond.store
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-NEXT: vmv.x.s a0, v8
+; RV64V-NEXT: fmv.h.x fa5, a0
+; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-NEXT: vmv.x.s a0, v12
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: andi a0, a1, 2
+; RV64V-NEXT: beqz a0, .LBB59_2
+; RV64V-NEXT: .LBB59_12: # %cond.store1
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v9, v8, 1
+; RV64V-NEXT: vmv.x.s a0, v9
+; RV64V-NEXT: fmv.h.x fa5, a0
+; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v9, v12, 1
+; RV64V-NEXT: vmv.x.s a0, v9
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: andi a0, a1, 4
+; RV64V-NEXT: beqz a0, .LBB59_3
+; RV64V-NEXT: .LBB59_13: # %cond.store3
+; RV64V-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v9, v8, 2
+; RV64V-NEXT: vmv.x.s a0, v9
+; RV64V-NEXT: fmv.h.x fa5, a0
+; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-NEXT: vslidedown.vi v10, v12, 2
+; RV64V-NEXT: vmv.x.s a0, v10
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: andi a0, a1, 8
+; RV64V-NEXT: bnez a0, .LBB59_4
+; RV64V-NEXT: j .LBB59_5
+; RV64V-NEXT: .LBB59_14: # %cond.store7
+; RV64V-NEXT: addi a0, sp, 192
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 224(sp)
+; RV64V-NEXT: vslidedown.vi v9, v8, 4
+; RV64V-NEXT: vmv.x.s a2, v9
+; RV64V-NEXT: fmv.h.x fa5, a2
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: andi a0, a1, 32
+; RV64V-NEXT: beqz a0, .LBB59_7
+; RV64V-NEXT: .LBB59_15: # %cond.store9
+; RV64V-NEXT: addi a0, sp, 128
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 168(sp)
+; RV64V-NEXT: vslidedown.vi v9, v8, 5
+; RV64V-NEXT: vmv.x.s a2, v9
+; RV64V-NEXT: fmv.h.x fa5, a2
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: andi a0, a1, 64
+; RV64V-NEXT: beqz a0, .LBB59_8
+; RV64V-NEXT: .LBB59_16: # %cond.store11
+; RV64V-NEXT: addi a0, sp, 64
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 112(sp)
+; RV64V-NEXT: vslidedown.vi v9, v8, 6
+; RV64V-NEXT: vmv.x.s a2, v9
+; RV64V-NEXT: fmv.h.x fa5, a2
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: andi a0, a1, -128
+; RV64V-NEXT: bnez a0, .LBB59_9
+; RV64V-NEXT: j .LBB59_10
;
-; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
+; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8bf16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
@@ -6321,8 +7346,10 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vse16.v v8, (a2)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32F-NEXT: vmv.x.s a3, v8
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: .LBB59_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB59_4
@@ -6334,7 +7361,9 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
-; RV64ZVE32F-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v10
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: .LBB59_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
@@ -6359,7 +7388,9 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
-; RV64ZVE32F-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: .LBB59_9: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
@@ -6376,7 +7407,9 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
-; RV64ZVE32F-NEXT: vse16.v v11, (a2)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v11
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB59_6
; RV64ZVE32F-NEXT: .LBB59_13: # %cond.store5
@@ -6387,7 +7420,9 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
-; RV64ZVE32F-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB59_7
; RV64ZVE32F-NEXT: .LBB59_14: # %cond.store7
@@ -6397,7 +7432,9 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
-; RV64ZVE32F-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB59_8
; RV64ZVE32F-NEXT: j .LBB59_9
@@ -6407,7 +7444,9 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
-; RV64ZVE32F-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v10
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB59_11
; RV64ZVE32F-NEXT: .LBB59_16: # %cond.store13
@@ -6418,32 +7457,270 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
-; RV64ZVE32F-NEXT: vse16.v v8, (a0)
+; RV64ZVE32F-NEXT: vmv.x.s a1, v8
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a1
+; RV64ZVE32F-NEXT: fsh fa5, 0(a0)
; RV64ZVE32F-NEXT: ret
%eidxs = sext <8 x i8> %idxs to <8 x i16>
- %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
- call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
+ %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs
+ call void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
ret void
}
-define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
-; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
+define void @mscatter_baseidx_zext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
+; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8bf16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vwaddu.vv v10, v9, v9
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vzext.vf4 v10, v9
+; RV32-NEXT: vadd.vv v10, v10, v10
+; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32-NEXT: vmv.x.s a1, v0
+; RV32-NEXT: andi a2, a1, 1
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vadd.vx v10, v10, a0
+; RV32-NEXT: bnez a2, .LBB60_9
+; RV32-NEXT: # %bb.1: # %else
+; RV32-NEXT: andi a0, a1, 2
+; RV32-NEXT: bnez a0, .LBB60_10
+; RV32-NEXT: .LBB60_2: # %else2
+; RV32-NEXT: andi a0, a1, 4
+; RV32-NEXT: bnez a0, .LBB60_11
+; RV32-NEXT: .LBB60_3: # %else4
+; RV32-NEXT: andi a0, a1, 8
+; RV32-NEXT: bnez a0, .LBB60_12
+; RV32-NEXT: .LBB60_4: # %else6
+; RV32-NEXT: andi a0, a1, 16
+; RV32-NEXT: bnez a0, .LBB60_13
+; RV32-NEXT: .LBB60_5: # %else8
+; RV32-NEXT: andi a0, a1, 32
+; RV32-NEXT: bnez a0, .LBB60_14
+; RV32-NEXT: .LBB60_6: # %else10
+; RV32-NEXT: andi a0, a1, 64
+; RV32-NEXT: bnez a0, .LBB60_15
+; RV32-NEXT: .LBB60_7: # %else12
+; RV32-NEXT: andi a0, a1, -128
+; RV32-NEXT: bnez a0, .LBB60_16
+; RV32-NEXT: .LBB60_8: # %else14
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB60_9: # %cond.store
; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.x.s a0, v10
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: andi a0, a1, 2
+; RV32-NEXT: beqz a0, .LBB60_2
+; RV32-NEXT: .LBB60_10: # %cond.store1
+; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 1
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v10, 1
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: andi a0, a1, 4
+; RV32-NEXT: beqz a0, .LBB60_3
+; RV32-NEXT: .LBB60_11: # %cond.store3
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 2
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v10, 2
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: andi a0, a1, 8
+; RV32-NEXT: beqz a0, .LBB60_4
+; RV32-NEXT: .LBB60_12: # %cond.store5
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 3
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v10, 3
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: andi a0, a1, 16
+; RV32-NEXT: beqz a0, .LBB60_5
+; RV32-NEXT: .LBB60_13: # %cond.store7
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 4
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v10, 4
+; RV32-NEXT: vmv.x.s a0, v12
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: andi a0, a1, 32
+; RV32-NEXT: beqz a0, .LBB60_6
+; RV32-NEXT: .LBB60_14: # %cond.store9
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 5
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v10, 5
+; RV32-NEXT: vmv.x.s a0, v12
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: andi a0, a1, 64
+; RV32-NEXT: beqz a0, .LBB60_7
+; RV32-NEXT: .LBB60_15: # %cond.store11
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 6
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v10, 6
+; RV32-NEXT: vmv.x.s a0, v12
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: andi a0, a1, -128
+; RV32-NEXT: beqz a0, .LBB60_8
+; RV32-NEXT: .LBB60_16: # %cond.store13
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v8, 7
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v10, 7
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: fsh fa5, 0(a0)
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT: vwaddu.vv v10, v9, v9
-; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8bf16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vzext.vf8 v12, v9
+; RV64V-NEXT: vadd.vv v12, v12, v12
+; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV64V-NEXT: vmv.x.s a1, v0
+; RV64V-NEXT: andi a2, a1, 1
+; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-NEXT: vadd.vx v12, v12, a0
+; RV64V-NEXT: bnez a2, .LBB60_11
+; RV64V-NEXT: # %bb.1: # %else
+; RV64V-NEXT: andi a0, a1, 2
+; RV64V-NEXT: bnez a0, .LBB60_12
+; RV64V-NEXT: .LBB60_2: # %else2
+; RV64V-NEXT: andi a0, a1, 4
+; RV64V-NEXT: bnez a0, .LBB60_13
+; RV64V-NEXT: .LBB60_3: # %else4
+; RV64V-NEXT: andi a0, a1, 8
+; RV64V-NEXT: beqz a0, .LBB60_5
+; RV64V-NEXT: .LBB60_4: # %cond.store5
+; RV64V-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v9, v8, 3
+; RV64V-NEXT: vmv.x.s a0, v9
+; RV64V-NEXT: fmv.h.x fa5, a0
+; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-NEXT: vslidedown.vi v10, v12, 3
+; RV64V-NEXT: vmv.x.s a0, v10
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: .LBB60_5: # %else6
+; RV64V-NEXT: addi sp, sp, -320
+; RV64V-NEXT: .cfi_def_cfa_offset 320
+; RV64V-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
+; RV64V-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
+; RV64V-NEXT: .cfi_offset ra, -8
+; RV64V-NEXT: .cfi_offset s0, -16
+; RV64V-NEXT: addi s0, sp, 320
+; RV64V-NEXT: .cfi_def_cfa s0, 0
+; RV64V-NEXT: andi sp, sp, -64
+; RV64V-NEXT: andi a0, a1, 16
+; RV64V-NEXT: bnez a0, .LBB60_14
+; RV64V-NEXT: # %bb.6: # %else8
+; RV64V-NEXT: andi a0, a1, 32
+; RV64V-NEXT: bnez a0, .LBB60_15
+; RV64V-NEXT: .LBB60_7: # %else10
+; RV64V-NEXT: andi a0, a1, 64
+; RV64V-NEXT: bnez a0, .LBB60_16
+; RV64V-NEXT: .LBB60_8: # %else12
+; RV64V-NEXT: andi a0, a1, -128
+; RV64V-NEXT: beqz a0, .LBB60_10
+; RV64V-NEXT: .LBB60_9: # %cond.store13
+; RV64V-NEXT: mv a0, sp
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 56(sp)
+; RV64V-NEXT: vslidedown.vi v8, v8, 7
+; RV64V-NEXT: vmv.x.s a1, v8
+; RV64V-NEXT: fmv.h.x fa5, a1
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: .LBB60_10: # %else14
+; RV64V-NEXT: addi sp, s0, -320
+; RV64V-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
+; RV64V-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
+; RV64V-NEXT: addi sp, sp, 320
+; RV64V-NEXT: ret
+; RV64V-NEXT: .LBB60_11: # %cond.store
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-NEXT: vmv.x.s a0, v8
+; RV64V-NEXT: fmv.h.x fa5, a0
+; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-NEXT: vmv.x.s a0, v12
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: andi a0, a1, 2
+; RV64V-NEXT: beqz a0, .LBB60_2
+; RV64V-NEXT: .LBB60_12: # %cond.store1
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v9, v8, 1
+; RV64V-NEXT: vmv.x.s a0, v9
+; RV64V-NEXT: fmv.h.x fa5, a0
+; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v9, v12, 1
+; RV64V-NEXT: vmv.x.s a0, v9
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: andi a0, a1, 4
+; RV64V-NEXT: beqz a0, .LBB60_3
+; RV64V-NEXT: .LBB60_13: # %cond.store3
+; RV64V-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v9, v8, 2
+; RV64V-NEXT: vmv.x.s a0, v9
+; RV64V-NEXT: fmv.h.x fa5, a0
+; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-NEXT: vslidedown.vi v10, v12, 2
+; RV64V-NEXT: vmv.x.s a0, v10
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: andi a0, a1, 8
+; RV64V-NEXT: bnez a0, .LBB60_4
+; RV64V-NEXT: j .LBB60_5
+; RV64V-NEXT: .LBB60_14: # %cond.store7
+; RV64V-NEXT: addi a0, sp, 192
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 224(sp)
+; RV64V-NEXT: vslidedown.vi v9, v8, 4
+; RV64V-NEXT: vmv.x.s a2, v9
+; RV64V-NEXT: fmv.h.x fa5, a2
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: andi a0, a1, 32
+; RV64V-NEXT: beqz a0, .LBB60_7
+; RV64V-NEXT: .LBB60_15: # %cond.store9
+; RV64V-NEXT: addi a0, sp, 128
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 168(sp)
+; RV64V-NEXT: vslidedown.vi v9, v8, 5
+; RV64V-NEXT: vmv.x.s a2, v9
+; RV64V-NEXT: fmv.h.x fa5, a2
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: andi a0, a1, 64
+; RV64V-NEXT: beqz a0, .LBB60_8
+; RV64V-NEXT: .LBB60_16: # %cond.store11
+; RV64V-NEXT: addi a0, sp, 64
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 112(sp)
+; RV64V-NEXT: vslidedown.vi v9, v8, 6
+; RV64V-NEXT: vmv.x.s a2, v9
+; RV64V-NEXT: fmv.h.x fa5, a2
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: andi a0, a1, -128
+; RV64V-NEXT: bnez a0, .LBB60_9
+; RV64V-NEXT: j .LBB60_10
;
-; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
+; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8bf16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
@@ -6454,8 +7731,10 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: vse16.v v8, (a2)
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32F-NEXT: vmv.x.s a3, v8
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: .LBB60_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB60_4
@@ -6468,7 +7747,9 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
-; RV64ZVE32F-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v10
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: .LBB60_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
@@ -6494,7 +7775,9 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
-; RV64ZVE32F-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: .LBB60_9: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
@@ -6512,7 +7795,9 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
-; RV64ZVE32F-NEXT: vse16.v v11, (a2)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v11
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB60_6
; RV64ZVE32F-NEXT: .LBB60_13: # %cond.store5
@@ -6524,7 +7809,9 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
-; RV64ZVE32F-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB60_7
; RV64ZVE32F-NEXT: .LBB60_14: # %cond.store7
@@ -6535,7 +7822,9 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
-; RV64ZVE32F-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB60_8
; RV64ZVE32F-NEXT: j .LBB60_9
@@ -6546,7 +7835,9 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
-; RV64ZVE32F-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v10
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB60_11
; RV64ZVE32F-NEXT: .LBB60_16: # %cond.store13
@@ -6558,43 +7849,282 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
-; RV64ZVE32F-NEXT: vse16.v v8, (a0)
+; RV64ZVE32F-NEXT: vmv.x.s a1, v8
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a1
+; RV64ZVE32F-NEXT: fsh fa5, 0(a0)
; RV64ZVE32F-NEXT: ret
%eidxs = zext <8 x i8> %idxs to <8 x i16>
- %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
- call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
+ %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs
+ call void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
ret void
}
-define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
-; RV32-LABEL: mscatter_baseidx_v8f16:
+define void @mscatter_baseidx_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
+; RV32-LABEL: mscatter_baseidx_v8bf16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vwadd.vv v10, v9, v9
-; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
+; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32-NEXT: vmv.x.s a1, v0
+; RV32-NEXT: andi a2, a1, 1
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vadd.vx v10, v10, a0
+; RV32-NEXT: bnez a2, .LBB61_9
+; RV32-NEXT: # %bb.1: # %else
+; RV32-NEXT: andi a0, a1, 2
+; RV32-NEXT: bnez a0, .LBB61_10
+; RV32-NEXT: .LBB61_2: # %else2
+; RV32-NEXT: andi a0, a1, 4
+; RV32-NEXT: bnez a0, .LBB61_11
+; RV32-NEXT: .LBB61_3: # %else4
+; RV32-NEXT: andi a0, a1, 8
+; RV32-NEXT: bnez a0, .LBB61_12
+; RV32-NEXT: .LBB61_4: # %else6
+; RV32-NEXT: andi a0, a1, 16
+; RV32-NEXT: bnez a0, .LBB61_13
+; RV32-NEXT: .LBB61_5: # %else8
+; RV32-NEXT: andi a0, a1, 32
+; RV32-NEXT: bnez a0, .LBB61_14
+; RV32-NEXT: .LBB61_6: # %else10
+; RV32-NEXT: andi a0, a1, 64
+; RV32-NEXT: bnez a0, .LBB61_15
+; RV32-NEXT: .LBB61_7: # %else12
+; RV32-NEXT: andi a0, a1, -128
+; RV32-NEXT: bnez a0, .LBB61_16
+; RV32-NEXT: .LBB61_8: # %else14
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB61_9: # %cond.store
+; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.x.s a0, v10
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: andi a0, a1, 2
+; RV32-NEXT: beqz a0, .LBB61_2
+; RV32-NEXT: .LBB61_10: # %cond.store1
+; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 1
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v10, 1
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: andi a0, a1, 4
+; RV32-NEXT: beqz a0, .LBB61_3
+; RV32-NEXT: .LBB61_11: # %cond.store3
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 2
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v10, 2
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: andi a0, a1, 8
+; RV32-NEXT: beqz a0, .LBB61_4
+; RV32-NEXT: .LBB61_12: # %cond.store5
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 3
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v10, 3
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: andi a0, a1, 16
+; RV32-NEXT: beqz a0, .LBB61_5
+; RV32-NEXT: .LBB61_13: # %cond.store7
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 4
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v10, 4
+; RV32-NEXT: vmv.x.s a0, v12
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: andi a0, a1, 32
+; RV32-NEXT: beqz a0, .LBB61_6
+; RV32-NEXT: .LBB61_14: # %cond.store9
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 5
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v10, 5
+; RV32-NEXT: vmv.x.s a0, v12
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: andi a0, a1, 64
+; RV32-NEXT: beqz a0, .LBB61_7
+; RV32-NEXT: .LBB61_15: # %cond.store11
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 6
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v12, v10, 6
+; RV32-NEXT: vmv.x.s a0, v12
+; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: andi a0, a1, -128
+; RV32-NEXT: beqz a0, .LBB61_8
+; RV32-NEXT: .LBB61_16: # %cond.store13
+; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v8, 7
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: fmv.h.x fa5, a0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vslidedown.vi v8, v10, 7
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: fsh fa5, 0(a0)
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_v8f16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf4 v12, v9
-; RV64-NEXT: vadd.vv v12, v12, v12
-; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_v8bf16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf4 v12, v9
+; RV64V-NEXT: vadd.vv v12, v12, v12
+; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV64V-NEXT: vmv.x.s a1, v0
+; RV64V-NEXT: andi a2, a1, 1
+; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-NEXT: vadd.vx v12, v12, a0
+; RV64V-NEXT: bnez a2, .LBB61_11
+; RV64V-NEXT: # %bb.1: # %else
+; RV64V-NEXT: andi a0, a1, 2
+; RV64V-NEXT: bnez a0, .LBB61_12
+; RV64V-NEXT: .LBB61_2: # %else2
+; RV64V-NEXT: andi a0, a1, 4
+; RV64V-NEXT: bnez a0, .LBB61_13
+; RV64V-NEXT: .LBB61_3: # %else4
+; RV64V-NEXT: andi a0, a1, 8
+; RV64V-NEXT: beqz a0, .LBB61_5
+; RV64V-NEXT: .LBB61_4: # %cond.store5
+; RV64V-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v9, v8, 3
+; RV64V-NEXT: vmv.x.s a0, v9
+; RV64V-NEXT: fmv.h.x fa5, a0
+; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-NEXT: vslidedown.vi v10, v12, 3
+; RV64V-NEXT: vmv.x.s a0, v10
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: .LBB61_5: # %else6
+; RV64V-NEXT: addi sp, sp, -320
+; RV64V-NEXT: .cfi_def_cfa_offset 320
+; RV64V-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
+; RV64V-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
+; RV64V-NEXT: .cfi_offset ra, -8
+; RV64V-NEXT: .cfi_offset s0, -16
+; RV64V-NEXT: addi s0, sp, 320
+; RV64V-NEXT: .cfi_def_cfa s0, 0
+; RV64V-NEXT: andi sp, sp, -64
+; RV64V-NEXT: andi a0, a1, 16
+; RV64V-NEXT: bnez a0, .LBB61_14
+; RV64V-NEXT: # %bb.6: # %else8
+; RV64V-NEXT: andi a0, a1, 32
+; RV64V-NEXT: bnez a0, .LBB61_15
+; RV64V-NEXT: .LBB61_7: # %else10
+; RV64V-NEXT: andi a0, a1, 64
+; RV64V-NEXT: bnez a0, .LBB61_16
+; RV64V-NEXT: .LBB61_8: # %else12
+; RV64V-NEXT: andi a0, a1, -128
+; RV64V-NEXT: beqz a0, .LBB61_10
+; RV64V-NEXT: .LBB61_9: # %cond.store13
+; RV64V-NEXT: mv a0, sp
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 56(sp)
+; RV64V-NEXT: vslidedown.vi v8, v8, 7
+; RV64V-NEXT: vmv.x.s a1, v8
+; RV64V-NEXT: fmv.h.x fa5, a1
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: .LBB61_10: # %else14
+; RV64V-NEXT: addi sp, s0, -320
+; RV64V-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
+; RV64V-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
+; RV64V-NEXT: addi sp, sp, 320
+; RV64V-NEXT: ret
+; RV64V-NEXT: .LBB61_11: # %cond.store
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-NEXT: vmv.x.s a0, v8
+; RV64V-NEXT: fmv.h.x fa5, a0
+; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-NEXT: vmv.x.s a0, v12
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: andi a0, a1, 2
+; RV64V-NEXT: beqz a0, .LBB61_2
+; RV64V-NEXT: .LBB61_12: # %cond.store1
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v9, v8, 1
+; RV64V-NEXT: vmv.x.s a0, v9
+; RV64V-NEXT: fmv.h.x fa5, a0
+; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v9, v12, 1
+; RV64V-NEXT: vmv.x.s a0, v9
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: andi a0, a1, 4
+; RV64V-NEXT: beqz a0, .LBB61_3
+; RV64V-NEXT: .LBB61_13: # %cond.store3
+; RV64V-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v9, v8, 2
+; RV64V-NEXT: vmv.x.s a0, v9
+; RV64V-NEXT: fmv.h.x fa5, a0
+; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-NEXT: vslidedown.vi v10, v12, 2
+; RV64V-NEXT: vmv.x.s a0, v10
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: andi a0, a1, 8
+; RV64V-NEXT: bnez a0, .LBB61_4
+; RV64V-NEXT: j .LBB61_5
+; RV64V-NEXT: .LBB61_14: # %cond.store7
+; RV64V-NEXT: addi a0, sp, 192
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 224(sp)
+; RV64V-NEXT: vslidedown.vi v9, v8, 4
+; RV64V-NEXT: vmv.x.s a2, v9
+; RV64V-NEXT: fmv.h.x fa5, a2
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: andi a0, a1, 32
+; RV64V-NEXT: beqz a0, .LBB61_7
+; RV64V-NEXT: .LBB61_15: # %cond.store9
+; RV64V-NEXT: addi a0, sp, 128
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 168(sp)
+; RV64V-NEXT: vslidedown.vi v9, v8, 5
+; RV64V-NEXT: vmv.x.s a2, v9
+; RV64V-NEXT: fmv.h.x fa5, a2
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: andi a0, a1, 64
+; RV64V-NEXT: beqz a0, .LBB61_8
+; RV64V-NEXT: .LBB61_16: # %cond.store11
+; RV64V-NEXT: addi a0, sp, 64
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vse64.v v12, (a0)
+; RV64V-NEXT: ld a0, 112(sp)
+; RV64V-NEXT: vslidedown.vi v9, v8, 6
+; RV64V-NEXT: vmv.x.s a2, v9
+; RV64V-NEXT: fmv.h.x fa5, a2
+; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: andi a0, a1, -128
+; RV64V-NEXT: bnez a0, .LBB61_9
+; RV64V-NEXT: j .LBB61_10
;
-; RV64ZVE32F-LABEL: mscatter_baseidx_v8f16:
+; RV64ZVE32F-LABEL: mscatter_baseidx_v8bf16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB61_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vse16.v v8, (a2)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v8
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: .LBB61_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB61_4
@@ -6606,7 +8136,9 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
-; RV64ZVE32F-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v10
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: .LBB61_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
@@ -6631,7 +8163,9 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
-; RV64ZVE32F-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: .LBB61_9: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
@@ -6648,7 +8182,9 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
-; RV64ZVE32F-NEXT: vse16.v v11, (a2)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v11
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB61_6
; RV64ZVE32F-NEXT: .LBB61_13: # %cond.store5
@@ -6659,7 +8195,9 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
-; RV64ZVE32F-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB61_7
; RV64ZVE32F-NEXT: .LBB61_14: # %cond.store7
@@ -6668,7 +8206,9 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
-; RV64ZVE32F-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB61_8
; RV64ZVE32F-NEXT: j .LBB61_9
@@ -6678,7 +8218,9 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
-; RV64ZVE32F-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-NEXT: vmv.x.s a3, v10
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB61_11
; RV64ZVE32F-NEXT: .LBB61_16: # %cond.store13
@@ -6689,43 +8231,3904 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
-; RV64ZVE32F-NEXT: vse16.v v8, (a0)
+; RV64ZVE32F-NEXT: vmv.x.s a1, v8
+; RV64ZVE32F-NEXT: fmv.h.x fa5, a1
+; RV64ZVE32F-NEXT: fsh fa5, 0(a0)
; RV64ZVE32F-NEXT: ret
- %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs
- call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
+ %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %idxs
+ call void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
ret void
}
-declare void @llvm.masked.scatter.v1f32.v1p0(<1 x float>, <1 x ptr>, i32, <1 x i1>)
+declare void @llvm.masked.scatter.v1f16.v1p0(<1 x half>, <1 x ptr>, i32, <1 x i1>)
-define void @mscatter_v1f32(<1 x float> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
-; RV32V-LABEL: mscatter_v1f32:
-; RV32V: # %bb.0:
-; RV32V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
-; RV32V-NEXT: ret
+define void @mscatter_v1f16(<1 x half> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
+; RV32V-ZVFH-LABEL: mscatter_v1f16:
+; RV32V-ZVFH: # %bb.0:
+; RV32V-ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV32V-ZVFH-NEXT: vsoxei32.v v8, (zero), v9, v0.t
+; RV32V-ZVFH-NEXT: ret
;
-; RV64-LABEL: mscatter_v1f32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
-; RV64-NEXT: ret
+; RV64V-ZVFH-LABEL: mscatter_v1f16:
+; RV64V-ZVFH: # %bb.0:
+; RV64V-ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV64V-ZVFH-NEXT: vsoxei64.v v8, (zero), v9, v0.t
+; RV64V-ZVFH-NEXT: ret
;
-; RV32ZVE32F-LABEL: mscatter_v1f32:
-; RV32ZVE32F: # %bb.0:
-; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
-; RV32ZVE32F-NEXT: ret
+; RV32V-ZVFHMIN-LABEL: mscatter_v1f16:
+; RV32V-ZVFHMIN: # %bb.0:
+; RV32V-ZVFHMIN-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; RV32V-ZVFHMIN-NEXT: vfirst.m a0, v0
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB62_2
+; RV32V-ZVFHMIN-NEXT: # %bb.1: # %cond.store
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: .LBB62_2: # %else
+; RV32V-ZVFHMIN-NEXT: ret
;
-; RV64ZVE32F-LABEL: mscatter_v1f32:
-; RV64ZVE32F: # %bb.0:
-; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; RV64V-ZVFHMIN-LABEL: mscatter_v1f16:
+; RV64V-ZVFHMIN: # %bb.0:
+; RV64V-ZVFHMIN-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; RV64V-ZVFHMIN-NEXT: vfirst.m a0, v0
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB62_2
+; RV64V-ZVFHMIN-NEXT: # %bb.1: # %cond.store
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: .LBB62_2: # %else
+; RV64V-ZVFHMIN-NEXT: ret
+;
+; RV32ZVE32F-ZVFH-LABEL: mscatter_v1f16:
+; RV32ZVE32F-ZVFH: # %bb.0:
+; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV32ZVE32F-ZVFH-NEXT: vsoxei32.v v8, (zero), v9, v0.t
+; RV32ZVE32F-ZVFH-NEXT: ret
+;
+; RV64ZVE32F-ZVFH-LABEL: mscatter_v1f16:
+; RV64ZVE32F-ZVFH: # %bb.0:
+; RV64ZVE32F-ZVFH-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vfirst.m a1, v0
+; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB62_2
+; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.store
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
+; RV64ZVE32F-ZVFH-NEXT: .LBB62_2: # %else
+; RV64ZVE32F-ZVFH-NEXT: ret
+;
+; RV32ZVE32F-ZVFHMIN-LABEL: mscatter_v1f16:
+; RV32ZVE32F-ZVFHMIN: # %bb.0:
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vfirst.m a0, v0
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB62_2
+; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.store
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB62_2: # %else
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+;
+; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_v1f16:
+; RV64ZVE32F-ZVFHMIN: # %bb.0:
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vfirst.m a1, v0
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB62_2
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.store
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB62_2: # %else
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+ call void @llvm.masked.scatter.v1f16.v1p0(<1 x half> %val, <1 x ptr> %ptrs, i32 2, <1 x i1> %m)
+ ret void
+}
+
+declare void @llvm.masked.scatter.v2f16.v2p0(<2 x half>, <2 x ptr>, i32, <2 x i1>)
+
+define void @mscatter_v2f16(<2 x half> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
+; RV32V-ZVFH-LABEL: mscatter_v2f16:
+; RV32V-ZVFH: # %bb.0:
+; RV32V-ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32V-ZVFH-NEXT: vsoxei32.v v8, (zero), v9, v0.t
+; RV32V-ZVFH-NEXT: ret
+;
+; RV64V-ZVFH-LABEL: mscatter_v2f16:
+; RV64V-ZVFH: # %bb.0:
+; RV64V-ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64V-ZVFH-NEXT: vsoxei64.v v8, (zero), v9, v0.t
+; RV64V-ZVFH-NEXT: ret
+;
+; RV32V-ZVFHMIN-LABEL: mscatter_v2f16:
+; RV32V-ZVFHMIN: # %bb.0:
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v0
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 1
+; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB63_3
+; RV32V-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV32V-ZVFHMIN-NEXT: andi a0, a0, 2
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB63_4
+; RV32V-ZVFHMIN-NEXT: .LBB63_2: # %else2
+; RV32V-ZVFHMIN-NEXT: ret
+; RV32V-ZVFHMIN-NEXT: .LBB63_3: # %cond.store
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV32V-ZVFHMIN-NEXT: andi a0, a0, 2
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB63_2
+; RV32V-ZVFHMIN-NEXT: .LBB63_4: # %cond.store1
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v9, 1
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: ret
+;
+; RV64V-ZVFHMIN-LABEL: mscatter_v2f16:
+; RV64V-ZVFHMIN: # %bb.0:
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v0
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 1
+; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB63_3
+; RV64V-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV64V-ZVFHMIN-NEXT: andi a0, a0, 2
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB63_4
+; RV64V-ZVFHMIN-NEXT: .LBB63_2: # %else2
+; RV64V-ZVFHMIN-NEXT: ret
+; RV64V-ZVFHMIN-NEXT: .LBB63_3: # %cond.store
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV64V-ZVFHMIN-NEXT: andi a0, a0, 2
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB63_2
+; RV64V-ZVFHMIN-NEXT: .LBB63_4: # %cond.store1
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v9, 1
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: ret
+;
+; RV32ZVE32F-ZVFH-LABEL: mscatter_v2f16:
+; RV32ZVE32F-ZVFH: # %bb.0:
+; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV32ZVE32F-ZVFH-NEXT: vsoxei32.v v8, (zero), v9, v0.t
+; RV32ZVE32F-ZVFH-NEXT: ret
+;
+; RV64ZVE32F-ZVFH-LABEL: mscatter_v2f16:
+; RV64ZVE32F-ZVFH: # %bb.0:
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v0
+; RV64ZVE32F-ZVFH-NEXT: andi a3, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: bnez a3, .LBB63_3
+; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 2
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB63_4
+; RV64ZVE32F-ZVFH-NEXT: .LBB63_2: # %else2
+; RV64ZVE32F-ZVFH-NEXT: ret
+; RV64ZVE32F-ZVFH-NEXT: .LBB63_3: # %cond.store
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB63_2
+; RV64ZVE32F-ZVFH-NEXT: .LBB63_4: # %cond.store1
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a1)
+; RV64ZVE32F-ZVFH-NEXT: ret
+;
+; RV32ZVE32F-ZVFHMIN-LABEL: mscatter_v2f16:
+; RV32ZVE32F-ZVFHMIN: # %bb.0:
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v0
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB63_3
+; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a0, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB63_4
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB63_2: # %else2
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB63_3: # %cond.store
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a0, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB63_2
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB63_4: # %cond.store1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v9, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+;
+; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_v2f16:
+; RV64ZVE32F-ZVFHMIN: # %bb.0:
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v0
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a3, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a3, .LBB63_3
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB63_4
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB63_2: # %else2
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB63_3: # %cond.store
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB63_2
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB63_4: # %cond.store1
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+ call void @llvm.masked.scatter.v2f16.v2p0(<2 x half> %val, <2 x ptr> %ptrs, i32 2, <2 x i1> %m)
+ ret void
+}
+
+declare void @llvm.masked.scatter.v4f16.v4p0(<4 x half>, <4 x ptr>, i32, <4 x i1>)
+
+define void @mscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
+; RV32V-ZVFH-LABEL: mscatter_v4f16:
+; RV32V-ZVFH: # %bb.0:
+; RV32V-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32V-ZVFH-NEXT: vsoxei32.v v8, (zero), v9, v0.t
+; RV32V-ZVFH-NEXT: ret
+;
+; RV64V-ZVFH-LABEL: mscatter_v4f16:
+; RV64V-ZVFH: # %bb.0:
+; RV64V-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64V-ZVFH-NEXT: vsoxei64.v v8, (zero), v10, v0.t
+; RV64V-ZVFH-NEXT: ret
+;
+; RV32V-ZVFHMIN-LABEL: mscatter_v4f16:
+; RV32V-ZVFHMIN: # %bb.0:
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v0
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 1
+; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB64_5
+; RV32V-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 2
+; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB64_6
+; RV32V-ZVFHMIN-NEXT: .LBB64_2: # %else2
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 4
+; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB64_7
+; RV32V-ZVFHMIN-NEXT: .LBB64_3: # %else4
+; RV32V-ZVFHMIN-NEXT: andi a0, a0, 8
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB64_8
+; RV32V-ZVFHMIN-NEXT: .LBB64_4: # %else6
+; RV32V-ZVFHMIN-NEXT: ret
+; RV32V-ZVFHMIN-NEXT: .LBB64_5: # %cond.store
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 2
+; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB64_2
+; RV32V-ZVFHMIN-NEXT: .LBB64_6: # %cond.store1
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v10
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 1
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v10
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 4
+; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB64_3
+; RV32V-ZVFHMIN-NEXT: .LBB64_7: # %cond.store3
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 2
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v10
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 2
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v10
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV32V-ZVFHMIN-NEXT: andi a0, a0, 8
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB64_4
+; RV32V-ZVFHMIN-NEXT: .LBB64_8: # %cond.store5
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v9, 3
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: ret
+;
+; RV64V-ZVFHMIN-LABEL: mscatter_v4f16:
+; RV64V-ZVFHMIN: # %bb.0:
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v0
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 1
+; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB64_5
+; RV64V-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 2
+; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB64_6
+; RV64V-ZVFHMIN-NEXT: .LBB64_2: # %else2
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 4
+; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB64_7
+; RV64V-ZVFHMIN-NEXT: .LBB64_3: # %else4
+; RV64V-ZVFHMIN-NEXT: andi a0, a0, 8
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB64_8
+; RV64V-ZVFHMIN-NEXT: .LBB64_4: # %else6
+; RV64V-ZVFHMIN-NEXT: ret
+; RV64V-ZVFHMIN-NEXT: .LBB64_5: # %cond.store
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v10
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 2
+; RV64V-ZVFHMIN-NEXT: beqz a1, .LBB64_2
+; RV64V-ZVFHMIN-NEXT: .LBB64_6: # %cond.store1
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 4
+; RV64V-ZVFHMIN-NEXT: beqz a1, .LBB64_3
+; RV64V-ZVFHMIN-NEXT: .LBB64_7: # %cond.store3
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 2
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v12
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV64V-ZVFHMIN-NEXT: andi a0, a0, 8
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB64_4
+; RV64V-ZVFHMIN-NEXT: .LBB64_8: # %cond.store5
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 3
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: ret
+;
+; RV32ZVE32F-ZVFH-LABEL: mscatter_v4f16:
+; RV32ZVE32F-ZVFH: # %bb.0:
+; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32ZVE32F-ZVFH-NEXT: vsoxei32.v v8, (zero), v9, v0.t
+; RV32ZVE32F-ZVFH-NEXT: ret
+;
+; RV64ZVE32F-ZVFH-LABEL: mscatter_v4f16:
+; RV64ZVE32F-ZVFH: # %bb.0:
+; RV64ZVE32F-ZVFH-NEXT: ld a4, 8(a0)
+; RV64ZVE32F-ZVFH-NEXT: ld a2, 16(a0)
+; RV64ZVE32F-ZVFH-NEXT: ld a1, 24(a0)
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a3, v0
+; RV64ZVE32F-ZVFH-NEXT: andi a5, a3, 1
+; RV64ZVE32F-ZVFH-NEXT: bnez a5, .LBB64_5
+; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else
+; RV64ZVE32F-ZVFH-NEXT: andi a0, a3, 2
+; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB64_6
+; RV64ZVE32F-ZVFH-NEXT: .LBB64_2: # %else2
+; RV64ZVE32F-ZVFH-NEXT: andi a0, a3, 4
+; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB64_7
+; RV64ZVE32F-ZVFH-NEXT: .LBB64_3: # %else4
+; RV64ZVE32F-ZVFH-NEXT: andi a3, a3, 8
+; RV64ZVE32F-ZVFH-NEXT: bnez a3, .LBB64_8
+; RV64ZVE32F-ZVFH-NEXT: .LBB64_4: # %else6
+; RV64ZVE32F-ZVFH-NEXT: ret
+; RV64ZVE32F-ZVFH-NEXT: .LBB64_5: # %cond.store
+; RV64ZVE32F-ZVFH-NEXT: ld a0, 0(a0)
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
+; RV64ZVE32F-ZVFH-NEXT: andi a0, a3, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB64_2
+; RV64ZVE32F-ZVFH-NEXT: .LBB64_6: # %cond.store1
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a4)
+; RV64ZVE32F-ZVFH-NEXT: andi a0, a3, 4
+; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB64_3
+; RV64ZVE32F-ZVFH-NEXT: .LBB64_7: # %cond.store3
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-ZVFH-NEXT: andi a3, a3, 8
+; RV64ZVE32F-ZVFH-NEXT: beqz a3, .LBB64_4
+; RV64ZVE32F-ZVFH-NEXT: .LBB64_8: # %cond.store5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 3
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a1)
+; RV64ZVE32F-ZVFH-NEXT: ret
+;
+; RV32ZVE32F-ZVFHMIN-LABEL: mscatter_v4f16:
+; RV32ZVE32F-ZVFHMIN: # %bb.0:
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v0
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB64_5
+; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB64_6
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB64_2: # %else2
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB64_7
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB64_3: # %else4
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a0, 8
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB64_8
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB64_4: # %else6
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB64_5: # %cond.store
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB64_2
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB64_6: # %cond.store1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB64_3
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB64_7: # %cond.store3
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a0, 8
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB64_4
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB64_8: # %cond.store5
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v9, 3
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+;
+; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_v4f16:
+; RV64ZVE32F-ZVFHMIN: # %bb.0:
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a4, 8(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 16(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a1, 24(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v0
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a5, a3, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a5, .LBB64_5
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a3, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB64_6
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_2: # %else2
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a3, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB64_7
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_3: # %else4
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a3, a3, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a3, .LBB64_8
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_4: # %else6
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_5: # %cond.store
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 0(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a5, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a5
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a3, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB64_2
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_6: # %cond.store1
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a4)
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a3, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB64_3
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_7: # %cond.store3
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a3, a3, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a3, .LBB64_4
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_8: # %cond.store5
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+ call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> %m)
+ ret void
+}
+
+define void @mscatter_truemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs) {
+; RV32V-ZVFH-LABEL: mscatter_truemask_v4f16:
+; RV32V-ZVFH: # %bb.0:
+; RV32V-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32V-ZVFH-NEXT: vsoxei32.v v8, (zero), v9
+; RV32V-ZVFH-NEXT: ret
+;
+; RV64V-ZVFH-LABEL: mscatter_truemask_v4f16:
+; RV64V-ZVFH: # %bb.0:
+; RV64V-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64V-ZVFH-NEXT: vsoxei64.v v8, (zero), v10
+; RV64V-ZVFH-NEXT: ret
+;
+; RV32V-ZVFHMIN-LABEL: mscatter_truemask_v4f16:
+; RV32V-ZVFHMIN: # %bb.0:
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 1
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 2
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 2
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v9, 3
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: ret
+;
+; RV64V-ZVFHMIN-LABEL: mscatter_truemask_v4f16:
+; RV64V-ZVFHMIN: # %bb.0:
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 2
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 3
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: ret
+;
+; RV32ZVE32F-ZVFH-LABEL: mscatter_truemask_v4f16:
+; RV32ZVE32F-ZVFH: # %bb.0:
+; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32ZVE32F-ZVFH-NEXT: vsoxei32.v v8, (zero), v9
+; RV32ZVE32F-ZVFH-NEXT: ret
+;
+; RV64ZVE32F-ZVFH-LABEL: mscatter_truemask_v4f16:
+; RV64ZVE32F-ZVFH: # %bb.0:
+; RV64ZVE32F-ZVFH-NEXT: ld a1, 0(a0)
+; RV64ZVE32F-ZVFH-NEXT: ld a2, 8(a0)
+; RV64ZVE32F-ZVFH-NEXT: ld a3, 16(a0)
+; RV64ZVE32F-ZVFH-NEXT: ld a0, 24(a0)
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a1)
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a3)
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 3
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
+; RV64ZVE32F-ZVFH-NEXT: ret
+;
+; RV32ZVE32F-ZVFHMIN-LABEL: mscatter_truemask_v4f16:
+; RV32ZVE32F-ZVFHMIN: # %bb.0:
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v9, 3
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+;
+; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_truemask_v4f16:
+; RV64ZVE32F-ZVFHMIN: # %bb.0:
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a1, 0(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 8(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a3, 16(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 24(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a4, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a4
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a3)
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+ call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1))
+ ret void
+}
+
+define void @mscatter_falsemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs) {
+; CHECK-LABEL: mscatter_falsemask_v4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ret
+ call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer)
+ ret void
+}
+
+declare void @llvm.masked.scatter.v8f16.v8p0(<8 x half>, <8 x ptr>, i32, <8 x i1>)
+
+define void @mscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
+; RV32V-ZVFH-LABEL: mscatter_v8f16:
+; RV32V-ZVFH: # %bb.0:
+; RV32V-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32V-ZVFH-NEXT: vsoxei32.v v8, (zero), v10, v0.t
+; RV32V-ZVFH-NEXT: ret
+;
+; RV64V-ZVFH-LABEL: mscatter_v8f16:
+; RV64V-ZVFH: # %bb.0:
+; RV64V-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-ZVFH-NEXT: vsoxei64.v v8, (zero), v12, v0.t
+; RV64V-ZVFH-NEXT: ret
+;
+; RV32V-ZVFHMIN-LABEL: mscatter_v8f16:
+; RV32V-ZVFHMIN: # %bb.0:
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v0
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 1
+; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB67_9
+; RV32V-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 2
+; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB67_10
+; RV32V-ZVFHMIN-NEXT: .LBB67_2: # %else2
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 4
+; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB67_11
+; RV32V-ZVFHMIN-NEXT: .LBB67_3: # %else4
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 8
+; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB67_12
+; RV32V-ZVFHMIN-NEXT: .LBB67_4: # %else6
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 16
+; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB67_13
+; RV32V-ZVFHMIN-NEXT: .LBB67_5: # %else8
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 32
+; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB67_14
+; RV32V-ZVFHMIN-NEXT: .LBB67_6: # %else10
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 64
+; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB67_15
+; RV32V-ZVFHMIN-NEXT: .LBB67_7: # %else12
+; RV32V-ZVFHMIN-NEXT: andi a0, a0, -128
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB67_16
+; RV32V-ZVFHMIN-NEXT: .LBB67_8: # %else14
+; RV32V-ZVFHMIN-NEXT: ret
+; RV32V-ZVFHMIN-NEXT: .LBB67_9: # %cond.store
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v10
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 2
+; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB67_2
+; RV32V-ZVFHMIN-NEXT: .LBB67_10: # %cond.store1
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 4
+; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB67_3
+; RV32V-ZVFHMIN-NEXT: .LBB67_11: # %cond.store3
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 8
+; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB67_4
+; RV32V-ZVFHMIN-NEXT: .LBB67_12: # %cond.store5
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 3
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 16
+; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB67_5
+; RV32V-ZVFHMIN-NEXT: .LBB67_13: # %cond.store7
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v12
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 32
+; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB67_6
+; RV32V-ZVFHMIN-NEXT: .LBB67_14: # %cond.store9
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v12
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV32V-ZVFHMIN-NEXT: andi a1, a0, 64
+; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB67_7
+; RV32V-ZVFHMIN-NEXT: .LBB67_15: # %cond.store11
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v12
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV32V-ZVFHMIN-NEXT: andi a0, a0, -128
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB67_8
+; RV32V-ZVFHMIN-NEXT: .LBB67_16: # %cond.store13
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 7
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: ret
+;
+; RV64V-ZVFHMIN-LABEL: mscatter_v8f16:
+; RV64V-ZVFHMIN: # %bb.0:
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v0
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 1
+; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB67_11
+; RV64V-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 2
+; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB67_12
+; RV64V-ZVFHMIN-NEXT: .LBB67_2: # %else2
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 4
+; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB67_13
+; RV64V-ZVFHMIN-NEXT: .LBB67_3: # %else4
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 8
+; RV64V-ZVFHMIN-NEXT: beqz a1, .LBB67_5
+; RV64V-ZVFHMIN-NEXT: .LBB67_4: # %cond.store5
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 3
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v10
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV64V-ZVFHMIN-NEXT: .LBB67_5: # %else6
+; RV64V-ZVFHMIN-NEXT: addi sp, sp, -320
+; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa_offset 320
+; RV64V-ZVFHMIN-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
+; RV64V-ZVFHMIN-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
+; RV64V-ZVFHMIN-NEXT: .cfi_offset ra, -8
+; RV64V-ZVFHMIN-NEXT: .cfi_offset s0, -16
+; RV64V-ZVFHMIN-NEXT: addi s0, sp, 320
+; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa s0, 0
+; RV64V-ZVFHMIN-NEXT: andi sp, sp, -64
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 16
+; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB67_14
+; RV64V-ZVFHMIN-NEXT: # %bb.6: # %else8
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 32
+; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB67_15
+; RV64V-ZVFHMIN-NEXT: .LBB67_7: # %else10
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 64
+; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB67_16
+; RV64V-ZVFHMIN-NEXT: .LBB67_8: # %else12
+; RV64V-ZVFHMIN-NEXT: andi a0, a0, -128
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB67_10
+; RV64V-ZVFHMIN-NEXT: .LBB67_9: # %cond.store13
+; RV64V-ZVFHMIN-NEXT: mv a0, sp
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 56(sp)
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: .LBB67_10: # %else14
+; RV64V-ZVFHMIN-NEXT: addi sp, s0, -320
+; RV64V-ZVFHMIN-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
+; RV64V-ZVFHMIN-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
+; RV64V-ZVFHMIN-NEXT: addi sp, sp, 320
+; RV64V-ZVFHMIN-NEXT: ret
+; RV64V-ZVFHMIN-NEXT: .LBB67_11: # %cond.store
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v12
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 2
+; RV64V-ZVFHMIN-NEXT: beqz a1, .LBB67_2
+; RV64V-ZVFHMIN-NEXT: .LBB67_12: # %cond.store1
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v12, 1
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 4
+; RV64V-ZVFHMIN-NEXT: beqz a1, .LBB67_3
+; RV64V-ZVFHMIN-NEXT: .LBB67_13: # %cond.store3
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 2
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v10
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 8
+; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB67_4
+; RV64V-ZVFHMIN-NEXT: j .LBB67_5
+; RV64V-ZVFHMIN-NEXT: .LBB67_14: # %cond.store7
+; RV64V-ZVFHMIN-NEXT: addi a1, sp, 192
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a1)
+; RV64V-ZVFHMIN-NEXT: ld a1, 224(sp)
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 32
+; RV64V-ZVFHMIN-NEXT: beqz a1, .LBB67_7
+; RV64V-ZVFHMIN-NEXT: .LBB67_15: # %cond.store9
+; RV64V-ZVFHMIN-NEXT: addi a1, sp, 128
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a1)
+; RV64V-ZVFHMIN-NEXT: ld a1, 168(sp)
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV64V-ZVFHMIN-NEXT: andi a1, a0, 64
+; RV64V-ZVFHMIN-NEXT: beqz a1, .LBB67_8
+; RV64V-ZVFHMIN-NEXT: .LBB67_16: # %cond.store11
+; RV64V-ZVFHMIN-NEXT: addi a1, sp, 64
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a1)
+; RV64V-ZVFHMIN-NEXT: ld a1, 112(sp)
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV64V-ZVFHMIN-NEXT: andi a0, a0, -128
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB67_9
+; RV64V-ZVFHMIN-NEXT: j .LBB67_10
+;
+; RV32ZVE32F-ZVFH-LABEL: mscatter_v8f16:
+; RV32ZVE32F-ZVFH: # %bb.0:
+; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32ZVE32F-ZVFH-NEXT: vsoxei32.v v8, (zero), v10, v0.t
+; RV32ZVE32F-ZVFH-NEXT: ret
+;
+; RV64ZVE32F-ZVFH-LABEL: mscatter_v8f16:
+; RV64ZVE32F-ZVFH: # %bb.0:
+; RV64ZVE32F-ZVFH-NEXT: ld a3, 40(a0)
+; RV64ZVE32F-ZVFH-NEXT: ld a2, 48(a0)
+; RV64ZVE32F-ZVFH-NEXT: ld a1, 56(a0)
+; RV64ZVE32F-ZVFH-NEXT: ld t0, 8(a0)
+; RV64ZVE32F-ZVFH-NEXT: ld a7, 16(a0)
+; RV64ZVE32F-ZVFH-NEXT: ld a6, 24(a0)
+; RV64ZVE32F-ZVFH-NEXT: ld a5, 32(a0)
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a4, v0
+; RV64ZVE32F-ZVFH-NEXT: andi t1, a4, 1
+; RV64ZVE32F-ZVFH-NEXT: bnez t1, .LBB67_9
+; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else
+; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 2
+; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_10
+; RV64ZVE32F-ZVFH-NEXT: .LBB67_2: # %else2
+; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 4
+; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_11
+; RV64ZVE32F-ZVFH-NEXT: .LBB67_3: # %else4
+; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 8
+; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_12
+; RV64ZVE32F-ZVFH-NEXT: .LBB67_4: # %else6
+; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 16
+; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_13
+; RV64ZVE32F-ZVFH-NEXT: .LBB67_5: # %else8
+; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 32
+; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_14
+; RV64ZVE32F-ZVFH-NEXT: .LBB67_6: # %else10
+; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 64
+; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_15
+; RV64ZVE32F-ZVFH-NEXT: .LBB67_7: # %else12
+; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, -128
+; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_16
+; RV64ZVE32F-ZVFH-NEXT: .LBB67_8: # %else14
+; RV64ZVE32F-ZVFH-NEXT: ret
+; RV64ZVE32F-ZVFH-NEXT: .LBB67_9: # %cond.store
+; RV64ZVE32F-ZVFH-NEXT: ld a0, 0(a0)
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
+; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_2
+; RV64ZVE32F-ZVFH-NEXT: .LBB67_10: # %cond.store1
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (t0)
+; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 4
+; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_3
+; RV64ZVE32F-ZVFH-NEXT: .LBB67_11: # %cond.store3
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a7)
+; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 8
+; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_4
+; RV64ZVE32F-ZVFH-NEXT: .LBB67_12: # %cond.store5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a6)
+; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 16
+; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_5
+; RV64ZVE32F-ZVFH-NEXT: .LBB67_13: # %cond.store7
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a5)
+; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 32
+; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_6
+; RV64ZVE32F-ZVFH-NEXT: .LBB67_14: # %cond.store9
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a3)
+; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 64
+; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_7
+; RV64ZVE32F-ZVFH-NEXT: .LBB67_15: # %cond.store11
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 6
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, -128
+; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_8
+; RV64ZVE32F-ZVFH-NEXT: .LBB67_16: # %cond.store13
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 7
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a1)
+; RV64ZVE32F-ZVFH-NEXT: ret
+;
+; RV32ZVE32F-ZVFHMIN-LABEL: mscatter_v8f16:
+; RV32ZVE32F-ZVFHMIN: # %bb.0:
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v0
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB67_9
+; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB67_10
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_2: # %else2
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB67_11
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_3: # %else4
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 8
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB67_12
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_4: # %else6
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 16
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB67_13
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_5: # %else8
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 32
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB67_14
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_6: # %else10
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 64
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB67_15
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_7: # %else12
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a0, -128
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_16
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_8: # %else14
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_9: # %cond.store
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB67_2
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_10: # %cond.store1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB67_3
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_11: # %cond.store3
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 8
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB67_4
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_12: # %cond.store5
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 3
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 16
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB67_5
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_13: # %cond.store7
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 32
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB67_6
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_14: # %cond.store9
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 64
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB67_7
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_15: # %cond.store11
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a0, -128
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_8
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_16: # %cond.store13
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 7
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+;
+; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_v8f16:
+; RV64ZVE32F-ZVFHMIN: # %bb.0:
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a3, 40(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 48(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a1, 56(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: ld t0, 8(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a7, 16(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a6, 24(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a5, 32(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a4, v0
+; RV64ZVE32F-ZVFHMIN-NEXT: andi t1, a4, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez t1, .LBB67_9
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_10
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_2: # %else2
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_11
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_3: # %else4
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_12
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_4: # %else6
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 16
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_13
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_5: # %else8
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 32
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_14
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_6: # %else10
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 64
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_15
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_7: # %else12
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_16
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_8: # %else14
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_9: # %cond.store
+; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 0(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s t1, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, t1
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_2
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_10: # %cond.store1
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(t0)
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_3
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_11: # %cond.store3
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a7)
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_4
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_12: # %cond.store5
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a6)
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 16
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_5
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_13: # %cond.store7
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a5)
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 32
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_6
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_14: # %cond.store9
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a3)
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 64
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_7
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_15: # %cond.store11
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_8
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_16: # %cond.store13
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+ call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
+ ret void
+}
+
+define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
+; RV32V-ZVFH-LABEL: mscatter_baseidx_v8i8_v8f16:
+; RV32V-ZVFH: # %bb.0:
+; RV32V-ZVFH-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32V-ZVFH-NEXT: vsext.vf4 v10, v9
+; RV32V-ZVFH-NEXT: vadd.vv v10, v10, v10
+; RV32V-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32V-ZVFH-NEXT: vsoxei32.v v8, (a0), v10, v0.t
+; RV32V-ZVFH-NEXT: ret
+;
+; RV64V-ZVFH-LABEL: mscatter_baseidx_v8i8_v8f16:
+; RV64V-ZVFH: # %bb.0:
+; RV64V-ZVFH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFH-NEXT: vsext.vf8 v12, v9
+; RV64V-ZVFH-NEXT: vadd.vv v12, v12, v12
+; RV64V-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-ZVFH-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-ZVFH-NEXT: ret
+;
+; RV32V-ZVFHMIN-LABEL: mscatter_baseidx_v8i8_v8f16:
+; RV32V-ZVFHMIN: # %bb.0:
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vsext.vf4 v10, v9
+; RV32V-ZVFHMIN-NEXT: vadd.vv v10, v10, v10
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV32V-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
+; RV32V-ZVFHMIN-NEXT: bnez a2, .LBB68_9
+; RV32V-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB68_10
+; RV32V-ZVFHMIN-NEXT: .LBB68_2: # %else2
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB68_11
+; RV32V-ZVFHMIN-NEXT: .LBB68_3: # %else4
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB68_12
+; RV32V-ZVFHMIN-NEXT: .LBB68_4: # %else6
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB68_13
+; RV32V-ZVFHMIN-NEXT: .LBB68_5: # %else8
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB68_14
+; RV32V-ZVFHMIN-NEXT: .LBB68_6: # %else10
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB68_15
+; RV32V-ZVFHMIN-NEXT: .LBB68_7: # %else12
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB68_16
+; RV32V-ZVFHMIN-NEXT: .LBB68_8: # %else14
+; RV32V-ZVFHMIN-NEXT: ret
+; RV32V-ZVFHMIN-NEXT: .LBB68_9: # %cond.store
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB68_2
+; RV32V-ZVFHMIN-NEXT: .LBB68_10: # %cond.store1
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB68_3
+; RV32V-ZVFHMIN-NEXT: .LBB68_11: # %cond.store3
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB68_4
+; RV32V-ZVFHMIN-NEXT: .LBB68_12: # %cond.store5
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 3
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB68_5
+; RV32V-ZVFHMIN-NEXT: .LBB68_13: # %cond.store7
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB68_6
+; RV32V-ZVFHMIN-NEXT: .LBB68_14: # %cond.store9
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB68_7
+; RV32V-ZVFHMIN-NEXT: .LBB68_15: # %cond.store11
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB68_8
+; RV32V-ZVFHMIN-NEXT: .LBB68_16: # %cond.store13
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 7
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: ret
+;
+; RV64V-ZVFHMIN-LABEL: mscatter_baseidx_v8i8_v8f16:
+; RV64V-ZVFHMIN: # %bb.0:
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vsext.vf8 v12, v9
+; RV64V-ZVFHMIN-NEXT: vadd.vv v12, v12, v12
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV64V-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vadd.vx v12, v12, a0
+; RV64V-ZVFHMIN-NEXT: bnez a2, .LBB68_11
+; RV64V-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB68_12
+; RV64V-ZVFHMIN-NEXT: .LBB68_2: # %else2
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB68_13
+; RV64V-ZVFHMIN-NEXT: .LBB68_3: # %else4
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB68_5
+; RV64V-ZVFHMIN-NEXT: .LBB68_4: # %cond.store5
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 3
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: .LBB68_5: # %else6
+; RV64V-ZVFHMIN-NEXT: addi sp, sp, -320
+; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa_offset 320
+; RV64V-ZVFHMIN-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
+; RV64V-ZVFHMIN-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
+; RV64V-ZVFHMIN-NEXT: .cfi_offset ra, -8
+; RV64V-ZVFHMIN-NEXT: .cfi_offset s0, -16
+; RV64V-ZVFHMIN-NEXT: addi s0, sp, 320
+; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa s0, 0
+; RV64V-ZVFHMIN-NEXT: andi sp, sp, -64
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB68_14
+; RV64V-ZVFHMIN-NEXT: # %bb.6: # %else8
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB68_15
+; RV64V-ZVFHMIN-NEXT: .LBB68_7: # %else10
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB68_16
+; RV64V-ZVFHMIN-NEXT: .LBB68_8: # %else12
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB68_10
+; RV64V-ZVFHMIN-NEXT: .LBB68_9: # %cond.store13
+; RV64V-ZVFHMIN-NEXT: mv a0, sp
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 56(sp)
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: .LBB68_10: # %else14
+; RV64V-ZVFHMIN-NEXT: addi sp, s0, -320
+; RV64V-ZVFHMIN-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
+; RV64V-ZVFHMIN-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
+; RV64V-ZVFHMIN-NEXT: addi sp, sp, 320
+; RV64V-ZVFHMIN-NEXT: ret
+; RV64V-ZVFHMIN-NEXT: .LBB68_11: # %cond.store
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB68_2
+; RV64V-ZVFHMIN-NEXT: .LBB68_12: # %cond.store1
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v12, 1
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB68_3
+; RV64V-ZVFHMIN-NEXT: .LBB68_13: # %cond.store3
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 2
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB68_4
+; RV64V-ZVFHMIN-NEXT: j .LBB68_5
+; RV64V-ZVFHMIN-NEXT: .LBB68_14: # %cond.store7
+; RV64V-ZVFHMIN-NEXT: addi a0, sp, 192
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 224(sp)
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB68_7
+; RV64V-ZVFHMIN-NEXT: .LBB68_15: # %cond.store9
+; RV64V-ZVFHMIN-NEXT: addi a0, sp, 128
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 168(sp)
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB68_8
+; RV64V-ZVFHMIN-NEXT: .LBB68_16: # %cond.store11
+; RV64V-ZVFHMIN-NEXT: addi a0, sp, 64
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 112(sp)
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB68_9
+; RV64V-ZVFHMIN-NEXT: j .LBB68_10
+;
+; RV32ZVE32F-ZVFH-LABEL: mscatter_baseidx_v8i8_v8f16:
+; RV32ZVE32F-ZVFH: # %bb.0:
+; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32ZVE32F-ZVFH-NEXT: vsext.vf4 v10, v9
+; RV32ZVE32F-ZVFH-NEXT: vadd.vv v10, v10, v10
+; RV32ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32ZVE32F-ZVFH-NEXT: vsoxei32.v v8, (a0), v10, v0.t
+; RV32ZVE32F-ZVFH-NEXT: ret
+;
+; RV64ZVE32F-ZVFH-LABEL: mscatter_baseidx_v8i8_v8f16:
+; RV64ZVE32F-ZVFH: # %bb.0:
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB68_2
+; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.store
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a2)
+; RV64ZVE32F-ZVFH-NEXT: .LBB68_2: # %else
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB68_4
+; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.store1
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-ZVFH-NEXT: .LBB68_4: # %else2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 4
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_12
+; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_13
+; RV64ZVE32F-ZVFH-NEXT: .LBB68_6: # %else6
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_14
+; RV64ZVE32F-ZVFH-NEXT: .LBB68_7: # %else8
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB68_9
+; RV64ZVE32F-ZVFH-NEXT: .LBB68_8: # %cond.store9
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-ZVFH-NEXT: .LBB68_9: # %else10
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 2
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_15
+; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB68_16
+; RV64ZVE32F-ZVFH-NEXT: .LBB68_11: # %else14
+; RV64ZVE32F-ZVFH-NEXT: ret
+; RV64ZVE32F-ZVFH-NEXT: .LBB68_12: # %cond.store3
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v11, v8, 2
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v11, (a2)
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB68_6
+; RV64ZVE32F-ZVFH-NEXT: .LBB68_13: # %cond.store5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB68_7
+; RV64ZVE32F-ZVFH-NEXT: .LBB68_14: # %cond.store7
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_8
+; RV64ZVE32F-ZVFH-NEXT: j .LBB68_9
+; RV64ZVE32F-ZVFH-NEXT: .LBB68_15: # %cond.store11
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 6
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB68_11
+; RV64ZVE32F-ZVFH-NEXT: .LBB68_16: # %cond.store13
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v9
+; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1
+; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 7
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
+; RV64ZVE32F-ZVFH-NEXT: ret
+;
+; RV32ZVE32F-ZVFHMIN-LABEL: mscatter_baseidx_v8i8_v8f16:
+; RV32ZVE32F-ZVFHMIN: # %bb.0:
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vsext.vf4 v10, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: vadd.vv v10, v10, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_9
+; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB68_10
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_2: # %else2
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB68_11
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_3: # %else4
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB68_12
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_4: # %else6
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB68_13
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_5: # %else8
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB68_14
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_6: # %else10
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB68_15
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_7: # %else12
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB68_16
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_8: # %else14
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_9: # %cond.store
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB68_2
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_10: # %cond.store1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB68_3
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_11: # %cond.store3
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB68_4
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_12: # %cond.store5
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 3
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB68_5
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_13: # %cond.store7
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB68_6
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_14: # %cond.store9
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB68_7
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_15: # %cond.store11
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB68_8
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_16: # %cond.store13
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 7
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+;
+; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_baseidx_v8i8_v8f16:
+; RV64ZVE32F-ZVFHMIN: # %bb.0:
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB68_2
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.store
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_2: # %else
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB68_4
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.store1
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_4: # %else2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_12
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_13
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_6: # %else6
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_14
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_7: # %else8
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB68_9
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_8: # %cond.store9
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_9: # %else10
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_15
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB68_16
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_11: # %else14
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_12: # %cond.store3
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v11
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB68_6
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_13: # %cond.store5
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB68_7
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_14: # %cond.store7
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_8
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB68_9
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_15: # %cond.store11
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 6
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB68_11
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_16: # %cond.store13
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+ %ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs
+ call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
+ ret void
+}
+
+define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
+; RV32V-ZVFH-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
+; RV32V-ZVFH: # %bb.0:
+; RV32V-ZVFH-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32V-ZVFH-NEXT: vsext.vf4 v10, v9
+; RV32V-ZVFH-NEXT: vadd.vv v10, v10, v10
+; RV32V-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32V-ZVFH-NEXT: vsoxei32.v v8, (a0), v10, v0.t
+; RV32V-ZVFH-NEXT: ret
+;
+; RV64V-ZVFH-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
+; RV64V-ZVFH: # %bb.0:
+; RV64V-ZVFH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFH-NEXT: vsext.vf8 v12, v9
+; RV64V-ZVFH-NEXT: vadd.vv v12, v12, v12
+; RV64V-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-ZVFH-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-ZVFH-NEXT: ret
+;
+; RV32V-ZVFHMIN-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
+; RV32V-ZVFHMIN: # %bb.0:
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vsext.vf4 v10, v9
+; RV32V-ZVFHMIN-NEXT: vadd.vv v10, v10, v10
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV32V-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
+; RV32V-ZVFHMIN-NEXT: bnez a2, .LBB69_9
+; RV32V-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB69_10
+; RV32V-ZVFHMIN-NEXT: .LBB69_2: # %else2
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB69_11
+; RV32V-ZVFHMIN-NEXT: .LBB69_3: # %else4
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB69_12
+; RV32V-ZVFHMIN-NEXT: .LBB69_4: # %else6
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB69_13
+; RV32V-ZVFHMIN-NEXT: .LBB69_5: # %else8
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB69_14
+; RV32V-ZVFHMIN-NEXT: .LBB69_6: # %else10
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB69_15
+; RV32V-ZVFHMIN-NEXT: .LBB69_7: # %else12
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB69_16
+; RV32V-ZVFHMIN-NEXT: .LBB69_8: # %else14
+; RV32V-ZVFHMIN-NEXT: ret
+; RV32V-ZVFHMIN-NEXT: .LBB69_9: # %cond.store
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB69_2
+; RV32V-ZVFHMIN-NEXT: .LBB69_10: # %cond.store1
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB69_3
+; RV32V-ZVFHMIN-NEXT: .LBB69_11: # %cond.store3
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB69_4
+; RV32V-ZVFHMIN-NEXT: .LBB69_12: # %cond.store5
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 3
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB69_5
+; RV32V-ZVFHMIN-NEXT: .LBB69_13: # %cond.store7
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB69_6
+; RV32V-ZVFHMIN-NEXT: .LBB69_14: # %cond.store9
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB69_7
+; RV32V-ZVFHMIN-NEXT: .LBB69_15: # %cond.store11
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB69_8
+; RV32V-ZVFHMIN-NEXT: .LBB69_16: # %cond.store13
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 7
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: ret
+;
+; RV64V-ZVFHMIN-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
+; RV64V-ZVFHMIN: # %bb.0:
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vsext.vf8 v12, v9
+; RV64V-ZVFHMIN-NEXT: vadd.vv v12, v12, v12
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV64V-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vadd.vx v12, v12, a0
+; RV64V-ZVFHMIN-NEXT: bnez a2, .LBB69_11
+; RV64V-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB69_12
+; RV64V-ZVFHMIN-NEXT: .LBB69_2: # %else2
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB69_13
+; RV64V-ZVFHMIN-NEXT: .LBB69_3: # %else4
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB69_5
+; RV64V-ZVFHMIN-NEXT: .LBB69_4: # %cond.store5
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 3
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: .LBB69_5: # %else6
+; RV64V-ZVFHMIN-NEXT: addi sp, sp, -320
+; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa_offset 320
+; RV64V-ZVFHMIN-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
+; RV64V-ZVFHMIN-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
+; RV64V-ZVFHMIN-NEXT: .cfi_offset ra, -8
+; RV64V-ZVFHMIN-NEXT: .cfi_offset s0, -16
+; RV64V-ZVFHMIN-NEXT: addi s0, sp, 320
+; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa s0, 0
+; RV64V-ZVFHMIN-NEXT: andi sp, sp, -64
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB69_14
+; RV64V-ZVFHMIN-NEXT: # %bb.6: # %else8
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB69_15
+; RV64V-ZVFHMIN-NEXT: .LBB69_7: # %else10
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB69_16
+; RV64V-ZVFHMIN-NEXT: .LBB69_8: # %else12
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB69_10
+; RV64V-ZVFHMIN-NEXT: .LBB69_9: # %cond.store13
+; RV64V-ZVFHMIN-NEXT: mv a0, sp
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 56(sp)
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: .LBB69_10: # %else14
+; RV64V-ZVFHMIN-NEXT: addi sp, s0, -320
+; RV64V-ZVFHMIN-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
+; RV64V-ZVFHMIN-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
+; RV64V-ZVFHMIN-NEXT: addi sp, sp, 320
+; RV64V-ZVFHMIN-NEXT: ret
+; RV64V-ZVFHMIN-NEXT: .LBB69_11: # %cond.store
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB69_2
+; RV64V-ZVFHMIN-NEXT: .LBB69_12: # %cond.store1
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v12, 1
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB69_3
+; RV64V-ZVFHMIN-NEXT: .LBB69_13: # %cond.store3
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 2
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB69_4
+; RV64V-ZVFHMIN-NEXT: j .LBB69_5
+; RV64V-ZVFHMIN-NEXT: .LBB69_14: # %cond.store7
+; RV64V-ZVFHMIN-NEXT: addi a0, sp, 192
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 224(sp)
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB69_7
+; RV64V-ZVFHMIN-NEXT: .LBB69_15: # %cond.store9
+; RV64V-ZVFHMIN-NEXT: addi a0, sp, 128
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 168(sp)
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB69_8
+; RV64V-ZVFHMIN-NEXT: .LBB69_16: # %cond.store11
+; RV64V-ZVFHMIN-NEXT: addi a0, sp, 64
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 112(sp)
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB69_9
+; RV64V-ZVFHMIN-NEXT: j .LBB69_10
+;
+; RV32ZVE32F-ZVFH-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
+; RV32ZVE32F-ZVFH: # %bb.0:
+; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32ZVE32F-ZVFH-NEXT: vsext.vf4 v10, v9
+; RV32ZVE32F-ZVFH-NEXT: vadd.vv v10, v10, v10
+; RV32ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32ZVE32F-ZVFH-NEXT: vsoxei32.v v8, (a0), v10, v0.t
+; RV32ZVE32F-ZVFH-NEXT: ret
+;
+; RV64ZVE32F-ZVFH-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
+; RV64ZVE32F-ZVFH: # %bb.0:
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_2
+; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.store
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a2)
+; RV64ZVE32F-ZVFH-NEXT: .LBB69_2: # %else
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_4
+; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.store1
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-ZVFH-NEXT: .LBB69_4: # %else2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 4
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_12
+; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_13
+; RV64ZVE32F-ZVFH-NEXT: .LBB69_6: # %else6
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_14
+; RV64ZVE32F-ZVFH-NEXT: .LBB69_7: # %else8
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_9
+; RV64ZVE32F-ZVFH-NEXT: .LBB69_8: # %cond.store9
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-ZVFH-NEXT: .LBB69_9: # %else10
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 2
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_15
+; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB69_16
+; RV64ZVE32F-ZVFH-NEXT: .LBB69_11: # %else14
+; RV64ZVE32F-ZVFH-NEXT: ret
+; RV64ZVE32F-ZVFH-NEXT: .LBB69_12: # %cond.store3
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v11, v8, 2
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v11, (a2)
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_6
+; RV64ZVE32F-ZVFH-NEXT: .LBB69_13: # %cond.store5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_7
+; RV64ZVE32F-ZVFH-NEXT: .LBB69_14: # %cond.store7
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_8
+; RV64ZVE32F-ZVFH-NEXT: j .LBB69_9
+; RV64ZVE32F-ZVFH-NEXT: .LBB69_15: # %cond.store11
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 6
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB69_11
+; RV64ZVE32F-ZVFH-NEXT: .LBB69_16: # %cond.store13
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v9
+; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1
+; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 7
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
+; RV64ZVE32F-ZVFH-NEXT: ret
+;
+; RV32ZVE32F-ZVFHMIN-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
+; RV32ZVE32F-ZVFHMIN: # %bb.0:
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vsext.vf4 v10, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: vadd.vv v10, v10, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_9
+; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB69_10
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_2: # %else2
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB69_11
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_3: # %else4
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB69_12
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_4: # %else6
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB69_13
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_5: # %else8
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB69_14
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_6: # %else10
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB69_15
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_7: # %else12
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB69_16
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_8: # %else14
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_9: # %cond.store
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB69_2
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_10: # %cond.store1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB69_3
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_11: # %cond.store3
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB69_4
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_12: # %cond.store5
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 3
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB69_5
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_13: # %cond.store7
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB69_6
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_14: # %cond.store9
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB69_7
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_15: # %cond.store11
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB69_8
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_16: # %cond.store13
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 7
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+;
+; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
+; RV64ZVE32F-ZVFHMIN: # %bb.0:
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_2
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.store
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_2: # %else
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_4
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.store1
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_4: # %else2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_12
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_13
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_6: # %else6
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_14
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_7: # %else8
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_9
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_8: # %cond.store9
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_9: # %else10
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_15
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB69_16
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_11: # %else14
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_12: # %cond.store3
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v11
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_6
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_13: # %cond.store5
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_7
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_14: # %cond.store7
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_8
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB69_9
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_15: # %cond.store11
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 6
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB69_11
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_16: # %cond.store13
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+ %eidxs = sext <8 x i8> %idxs to <8 x i16>
+ %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
+ call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
+ ret void
+}
+
+define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
+; RV32V-ZVFH-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
+; RV32V-ZVFH: # %bb.0:
+; RV32V-ZVFH-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32V-ZVFH-NEXT: vwaddu.vv v10, v9, v9
+; RV32V-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32V-ZVFH-NEXT: vsoxei16.v v8, (a0), v10, v0.t
+; RV32V-ZVFH-NEXT: ret
+;
+; RV64V-ZVFH-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
+; RV64V-ZVFH: # %bb.0:
+; RV64V-ZVFH-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64V-ZVFH-NEXT: vwaddu.vv v10, v9, v9
+; RV64V-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-ZVFH-NEXT: vsoxei16.v v8, (a0), v10, v0.t
+; RV64V-ZVFH-NEXT: ret
+;
+; RV32V-ZVFHMIN-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
+; RV32V-ZVFHMIN: # %bb.0:
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vzext.vf4 v10, v9
+; RV32V-ZVFHMIN-NEXT: vadd.vv v10, v10, v10
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV32V-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
+; RV32V-ZVFHMIN-NEXT: bnez a2, .LBB70_9
+; RV32V-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB70_10
+; RV32V-ZVFHMIN-NEXT: .LBB70_2: # %else2
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB70_11
+; RV32V-ZVFHMIN-NEXT: .LBB70_3: # %else4
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB70_12
+; RV32V-ZVFHMIN-NEXT: .LBB70_4: # %else6
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB70_13
+; RV32V-ZVFHMIN-NEXT: .LBB70_5: # %else8
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB70_14
+; RV32V-ZVFHMIN-NEXT: .LBB70_6: # %else10
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB70_15
+; RV32V-ZVFHMIN-NEXT: .LBB70_7: # %else12
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB70_16
+; RV32V-ZVFHMIN-NEXT: .LBB70_8: # %else14
+; RV32V-ZVFHMIN-NEXT: ret
+; RV32V-ZVFHMIN-NEXT: .LBB70_9: # %cond.store
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB70_2
+; RV32V-ZVFHMIN-NEXT: .LBB70_10: # %cond.store1
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB70_3
+; RV32V-ZVFHMIN-NEXT: .LBB70_11: # %cond.store3
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB70_4
+; RV32V-ZVFHMIN-NEXT: .LBB70_12: # %cond.store5
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 3
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB70_5
+; RV32V-ZVFHMIN-NEXT: .LBB70_13: # %cond.store7
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB70_6
+; RV32V-ZVFHMIN-NEXT: .LBB70_14: # %cond.store9
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB70_7
+; RV32V-ZVFHMIN-NEXT: .LBB70_15: # %cond.store11
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB70_8
+; RV32V-ZVFHMIN-NEXT: .LBB70_16: # %cond.store13
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 7
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: ret
+;
+; RV64V-ZVFHMIN-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
+; RV64V-ZVFHMIN: # %bb.0:
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vzext.vf8 v12, v9
+; RV64V-ZVFHMIN-NEXT: vadd.vv v12, v12, v12
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV64V-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vadd.vx v12, v12, a0
+; RV64V-ZVFHMIN-NEXT: bnez a2, .LBB70_11
+; RV64V-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB70_12
+; RV64V-ZVFHMIN-NEXT: .LBB70_2: # %else2
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB70_13
+; RV64V-ZVFHMIN-NEXT: .LBB70_3: # %else4
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB70_5
+; RV64V-ZVFHMIN-NEXT: .LBB70_4: # %cond.store5
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 3
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: .LBB70_5: # %else6
+; RV64V-ZVFHMIN-NEXT: addi sp, sp, -320
+; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa_offset 320
+; RV64V-ZVFHMIN-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
+; RV64V-ZVFHMIN-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
+; RV64V-ZVFHMIN-NEXT: .cfi_offset ra, -8
+; RV64V-ZVFHMIN-NEXT: .cfi_offset s0, -16
+; RV64V-ZVFHMIN-NEXT: addi s0, sp, 320
+; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa s0, 0
+; RV64V-ZVFHMIN-NEXT: andi sp, sp, -64
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB70_14
+; RV64V-ZVFHMIN-NEXT: # %bb.6: # %else8
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB70_15
+; RV64V-ZVFHMIN-NEXT: .LBB70_7: # %else10
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB70_16
+; RV64V-ZVFHMIN-NEXT: .LBB70_8: # %else12
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB70_10
+; RV64V-ZVFHMIN-NEXT: .LBB70_9: # %cond.store13
+; RV64V-ZVFHMIN-NEXT: mv a0, sp
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 56(sp)
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: .LBB70_10: # %else14
+; RV64V-ZVFHMIN-NEXT: addi sp, s0, -320
+; RV64V-ZVFHMIN-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
+; RV64V-ZVFHMIN-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
+; RV64V-ZVFHMIN-NEXT: addi sp, sp, 320
+; RV64V-ZVFHMIN-NEXT: ret
+; RV64V-ZVFHMIN-NEXT: .LBB70_11: # %cond.store
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB70_2
+; RV64V-ZVFHMIN-NEXT: .LBB70_12: # %cond.store1
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v12, 1
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB70_3
+; RV64V-ZVFHMIN-NEXT: .LBB70_13: # %cond.store3
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 2
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB70_4
+; RV64V-ZVFHMIN-NEXT: j .LBB70_5
+; RV64V-ZVFHMIN-NEXT: .LBB70_14: # %cond.store7
+; RV64V-ZVFHMIN-NEXT: addi a0, sp, 192
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 224(sp)
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB70_7
+; RV64V-ZVFHMIN-NEXT: .LBB70_15: # %cond.store9
+; RV64V-ZVFHMIN-NEXT: addi a0, sp, 128
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 168(sp)
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB70_8
+; RV64V-ZVFHMIN-NEXT: .LBB70_16: # %cond.store11
+; RV64V-ZVFHMIN-NEXT: addi a0, sp, 64
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 112(sp)
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB70_9
+; RV64V-ZVFHMIN-NEXT: j .LBB70_10
+;
+; RV32ZVE32F-ZVFH-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
+; RV32ZVE32F-ZVFH: # %bb.0:
+; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32ZVE32F-ZVFH-NEXT: vwaddu.vv v10, v9, v9
+; RV32ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32ZVE32F-ZVFH-NEXT: vsoxei16.v v8, (a0), v10, v0.t
+; RV32ZVE32F-ZVFH-NEXT: ret
+;
+; RV64ZVE32F-ZVFH-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
+; RV64ZVE32F-ZVFH: # %bb.0:
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_2
+; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.store
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a2)
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_2: # %else
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_4
+; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.store1
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_4: # %else2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 4
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_12
+; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_13
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_6: # %else6
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_14
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_7: # %else8
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_9
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_8: # %cond.store9
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_9: # %else10
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 2
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_15
+; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB70_16
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_11: # %else14
+; RV64ZVE32F-ZVFH-NEXT: ret
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_12: # %cond.store3
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v11, v8, 2
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v11, (a2)
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_6
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_13: # %cond.store5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_7
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_14: # %cond.store7
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_8
+; RV64ZVE32F-ZVFH-NEXT: j .LBB70_9
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_15: # %cond.store11
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 6
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB70_11
+; RV64ZVE32F-ZVFH-NEXT: .LBB70_16: # %cond.store13
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v9
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, 255
+; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1
+; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 7
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
+; RV64ZVE32F-ZVFH-NEXT: ret
+;
+; RV32ZVE32F-ZVFHMIN-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
+; RV32ZVE32F-ZVFHMIN: # %bb.0:
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vzext.vf4 v10, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: vadd.vv v10, v10, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_9
+; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB70_10
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_2: # %else2
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB70_11
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_3: # %else4
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB70_12
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_4: # %else6
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB70_13
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_5: # %else8
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB70_14
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_6: # %else10
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB70_15
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_7: # %else12
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB70_16
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_8: # %else14
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_9: # %cond.store
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB70_2
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_10: # %cond.store1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB70_3
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_11: # %cond.store3
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB70_4
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_12: # %cond.store5
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 3
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB70_5
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_13: # %cond.store7
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB70_6
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_14: # %cond.store9
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB70_7
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_15: # %cond.store11
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB70_8
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_16: # %cond.store13
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 7
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+;
+; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
+; RV64ZVE32F-ZVFHMIN: # %bb.0:
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_2
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.store
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_2: # %else
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_4
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.store1
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_4: # %else2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_12
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_13
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_6: # %else6
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_14
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_7: # %else8
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_9
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_8: # %cond.store9
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_9: # %else10
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_15
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB70_16
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_11: # %else14
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_12: # %cond.store3
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v11
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_6
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_13: # %cond.store5
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_7
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_14: # %cond.store7
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_8
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB70_9
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_15: # %cond.store11
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 6
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB70_11
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_16: # %cond.store13
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, 255
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+ %eidxs = zext <8 x i8> %idxs to <8 x i16>
+ %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
+ call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
+ ret void
+}
+
+define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
+; RV32V-ZVFH-LABEL: mscatter_baseidx_v8f16:
+; RV32V-ZVFH: # %bb.0:
+; RV32V-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32V-ZVFH-NEXT: vwadd.vv v10, v9, v9
+; RV32V-ZVFH-NEXT: vsoxei32.v v8, (a0), v10, v0.t
+; RV32V-ZVFH-NEXT: ret
+;
+; RV64V-ZVFH-LABEL: mscatter_baseidx_v8f16:
+; RV64V-ZVFH: # %bb.0:
+; RV64V-ZVFH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFH-NEXT: vsext.vf4 v12, v9
+; RV64V-ZVFH-NEXT: vadd.vv v12, v12, v12
+; RV64V-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-ZVFH-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-ZVFH-NEXT: ret
+;
+; RV32V-ZVFHMIN-LABEL: mscatter_baseidx_v8f16:
+; RV32V-ZVFHMIN: # %bb.0:
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vwadd.vv v10, v9, v9
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV32V-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
+; RV32V-ZVFHMIN-NEXT: bnez a2, .LBB71_9
+; RV32V-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB71_10
+; RV32V-ZVFHMIN-NEXT: .LBB71_2: # %else2
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB71_11
+; RV32V-ZVFHMIN-NEXT: .LBB71_3: # %else4
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB71_12
+; RV32V-ZVFHMIN-NEXT: .LBB71_4: # %else6
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB71_13
+; RV32V-ZVFHMIN-NEXT: .LBB71_5: # %else8
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB71_14
+; RV32V-ZVFHMIN-NEXT: .LBB71_6: # %else10
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB71_15
+; RV32V-ZVFHMIN-NEXT: .LBB71_7: # %else12
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB71_16
+; RV32V-ZVFHMIN-NEXT: .LBB71_8: # %else14
+; RV32V-ZVFHMIN-NEXT: ret
+; RV32V-ZVFHMIN-NEXT: .LBB71_9: # %cond.store
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB71_2
+; RV32V-ZVFHMIN-NEXT: .LBB71_10: # %cond.store1
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB71_3
+; RV32V-ZVFHMIN-NEXT: .LBB71_11: # %cond.store3
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB71_4
+; RV32V-ZVFHMIN-NEXT: .LBB71_12: # %cond.store5
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 3
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB71_5
+; RV32V-ZVFHMIN-NEXT: .LBB71_13: # %cond.store7
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB71_6
+; RV32V-ZVFHMIN-NEXT: .LBB71_14: # %cond.store9
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB71_7
+; RV32V-ZVFHMIN-NEXT: .LBB71_15: # %cond.store11
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB71_8
+; RV32V-ZVFHMIN-NEXT: .LBB71_16: # %cond.store13
+; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 7
+; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32V-ZVFHMIN-NEXT: ret
+;
+; RV64V-ZVFHMIN-LABEL: mscatter_baseidx_v8f16:
+; RV64V-ZVFHMIN: # %bb.0:
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vsext.vf4 v12, v9
+; RV64V-ZVFHMIN-NEXT: vadd.vv v12, v12, v12
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV64V-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vadd.vx v12, v12, a0
+; RV64V-ZVFHMIN-NEXT: bnez a2, .LBB71_11
+; RV64V-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB71_12
+; RV64V-ZVFHMIN-NEXT: .LBB71_2: # %else2
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB71_13
+; RV64V-ZVFHMIN-NEXT: .LBB71_3: # %else4
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB71_5
+; RV64V-ZVFHMIN-NEXT: .LBB71_4: # %cond.store5
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 3
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: .LBB71_5: # %else6
+; RV64V-ZVFHMIN-NEXT: addi sp, sp, -320
+; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa_offset 320
+; RV64V-ZVFHMIN-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
+; RV64V-ZVFHMIN-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
+; RV64V-ZVFHMIN-NEXT: .cfi_offset ra, -8
+; RV64V-ZVFHMIN-NEXT: .cfi_offset s0, -16
+; RV64V-ZVFHMIN-NEXT: addi s0, sp, 320
+; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa s0, 0
+; RV64V-ZVFHMIN-NEXT: andi sp, sp, -64
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB71_14
+; RV64V-ZVFHMIN-NEXT: # %bb.6: # %else8
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB71_15
+; RV64V-ZVFHMIN-NEXT: .LBB71_7: # %else10
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB71_16
+; RV64V-ZVFHMIN-NEXT: .LBB71_8: # %else12
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB71_10
+; RV64V-ZVFHMIN-NEXT: .LBB71_9: # %cond.store13
+; RV64V-ZVFHMIN-NEXT: mv a0, sp
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 56(sp)
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: .LBB71_10: # %else14
+; RV64V-ZVFHMIN-NEXT: addi sp, s0, -320
+; RV64V-ZVFHMIN-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
+; RV64V-ZVFHMIN-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
+; RV64V-ZVFHMIN-NEXT: addi sp, sp, 320
+; RV64V-ZVFHMIN-NEXT: ret
+; RV64V-ZVFHMIN-NEXT: .LBB71_11: # %cond.store
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB71_2
+; RV64V-ZVFHMIN-NEXT: .LBB71_12: # %cond.store1
+; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v12, 1
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB71_3
+; RV64V-ZVFHMIN-NEXT: .LBB71_13: # %cond.store3
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 2
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB71_4
+; RV64V-ZVFHMIN-NEXT: j .LBB71_5
+; RV64V-ZVFHMIN-NEXT: .LBB71_14: # %cond.store7
+; RV64V-ZVFHMIN-NEXT: addi a0, sp, 192
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 224(sp)
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB71_7
+; RV64V-ZVFHMIN-NEXT: .LBB71_15: # %cond.store9
+; RV64V-ZVFHMIN-NEXT: addi a0, sp, 128
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 168(sp)
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB71_8
+; RV64V-ZVFHMIN-NEXT: .LBB71_16: # %cond.store11
+; RV64V-ZVFHMIN-NEXT: addi a0, sp, 64
+; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
+; RV64V-ZVFHMIN-NEXT: ld a0, 112(sp)
+; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
+; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
+; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB71_9
+; RV64V-ZVFHMIN-NEXT: j .LBB71_10
+;
+; RV32ZVE32F-ZVFH-LABEL: mscatter_baseidx_v8f16:
+; RV32ZVE32F-ZVFH: # %bb.0:
+; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32ZVE32F-ZVFH-NEXT: vwadd.vv v10, v9, v9
+; RV32ZVE32F-ZVFH-NEXT: vsoxei32.v v8, (a0), v10, v0.t
+; RV32ZVE32F-ZVFH-NEXT: ret
+;
+; RV64ZVE32F-ZVFH-LABEL: mscatter_baseidx_v8f16:
+; RV64ZVE32F-ZVFH: # %bb.0:
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB71_2
+; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.store
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a2)
+; RV64ZVE32F-ZVFH-NEXT: .LBB71_2: # %else
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB71_4
+; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.store1
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-ZVFH-NEXT: .LBB71_4: # %else2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 4
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_12
+; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_13
+; RV64ZVE32F-ZVFH-NEXT: .LBB71_6: # %else6
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_14
+; RV64ZVE32F-ZVFH-NEXT: .LBB71_7: # %else8
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB71_9
+; RV64ZVE32F-ZVFH-NEXT: .LBB71_8: # %cond.store9
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-ZVFH-NEXT: .LBB71_9: # %else10
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 2
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_15
+; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB71_16
+; RV64ZVE32F-ZVFH-NEXT: .LBB71_11: # %else14
+; RV64ZVE32F-ZVFH-NEXT: ret
+; RV64ZVE32F-ZVFH-NEXT: .LBB71_12: # %cond.store3
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v11, v8, 2
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v11, (a2)
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB71_6
+; RV64ZVE32F-ZVFH-NEXT: .LBB71_13: # %cond.store5
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB71_7
+; RV64ZVE32F-ZVFH-NEXT: .LBB71_14: # %cond.store7
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
+; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_8
+; RV64ZVE32F-ZVFH-NEXT: j .LBB71_9
+; RV64ZVE32F-ZVFH-NEXT: .LBB71_15: # %cond.store11
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 6
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
+; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB71_11
+; RV64ZVE32F-ZVFH-NEXT: .LBB71_16: # %cond.store13
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v9
+; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1
+; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1
+; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 7
+; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
+; RV64ZVE32F-ZVFH-NEXT: ret
+;
+; RV32ZVE32F-ZVFHMIN-LABEL: mscatter_baseidx_v8f16:
+; RV32ZVE32F-ZVFHMIN: # %bb.0:
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vwadd.vv v10, v9, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_9
+; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB71_10
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_2: # %else2
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB71_11
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_3: # %else4
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB71_12
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_4: # %else6
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB71_13
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_5: # %else8
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB71_14
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_6: # %else10
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB71_15
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_7: # %else12
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB71_16
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_8: # %else14
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_9: # %cond.store
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB71_2
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_10: # %cond.store1
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB71_3
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_11: # %cond.store3
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB71_4
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_12: # %cond.store5
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 3
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB71_5
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_13: # %cond.store7
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB71_6
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_14: # %cond.store9
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB71_7
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_15: # %cond.store11
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
+; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB71_8
+; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_16: # %cond.store13
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
+; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 7
+; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
+; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-ZVFHMIN-NEXT: ret
+;
+; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_baseidx_v8f16:
+; RV64ZVE32F-ZVFHMIN: # %bb.0:
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB71_2
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.store
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_2: # %else
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB71_4
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.store1
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_4: # %else2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_12
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else4
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_13
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_6: # %else6
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_14
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_7: # %else8
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB71_9
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_8: # %cond.store9
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_9: # %else10
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_15
+; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else12
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB71_16
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_11: # %else14
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_12: # %cond.store3
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 2
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v11
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB71_6
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_13: # %cond.store5
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB71_7
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_14: # %cond.store7
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
+; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_8
+; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB71_9
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_15: # %cond.store11
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 6
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
+; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
+; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB71_11
+; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_16: # %cond.store13
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
+; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1
+; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1
+; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
+; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
+; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
+; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
+; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
+; RV64ZVE32F-ZVFHMIN-NEXT: ret
+ %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs
+ call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
+ ret void
+}
+
+declare void @llvm.masked.scatter.v1f32.v1p0(<1 x float>, <1 x ptr>, i32, <1 x i1>)
+
+define void @mscatter_v1f32(<1 x float> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
+; RV32V-LABEL: mscatter_v1f32:
+; RV32V: # %bb.0:
+; RV32V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
+; RV32V-NEXT: ret
+;
+; RV64V-LABEL: mscatter_v1f32:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
+; RV64V-NEXT: ret
+;
+; RV32ZVE32F-LABEL: mscatter_v1f32:
+; RV32ZVE32F: # %bb.0:
+; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
+; RV32ZVE32F-NEXT: ret
+;
+; RV64ZVE32F-LABEL: mscatter_v1f32:
+; RV64ZVE32F: # %bb.0:
+; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vfirst.m a1, v0
-; RV64ZVE32F-NEXT: bnez a1, .LBB62_2
+; RV64ZVE32F-NEXT: bnez a1, .LBB72_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a0)
-; RV64ZVE32F-NEXT: .LBB62_2: # %else
+; RV64ZVE32F-NEXT: .LBB72_2: # %else
; RV64ZVE32F-NEXT: ret
call void @llvm.masked.scatter.v1f32.v1p0(<1 x float> %val, <1 x ptr> %ptrs, i32 4, <1 x i1> %m)
ret void
@@ -6740,11 +12143,11 @@ define void @mscatter_v2f32(<2 x float> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_v2f32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v2f32:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_v2f32:
; RV32ZVE32F: # %bb.0:
@@ -6757,18 +12160,18 @@ define void @mscatter_v2f32(<2 x float> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
-; RV64ZVE32F-NEXT: bnez a3, .LBB63_3
+; RV64ZVE32F-NEXT: bnez a3, .LBB73_3
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB63_4
-; RV64ZVE32F-NEXT: .LBB63_2: # %else2
+; RV64ZVE32F-NEXT: bnez a2, .LBB73_4
+; RV64ZVE32F-NEXT: .LBB73_2: # %else2
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB63_3: # %cond.store
+; RV64ZVE32F-NEXT: .LBB73_3: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a0)
; RV64ZVE32F-NEXT: andi a2, a2, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB63_2
-; RV64ZVE32F-NEXT: .LBB63_4: # %cond.store1
+; RV64ZVE32F-NEXT: beqz a2, .LBB73_2
+; RV64ZVE32F-NEXT: .LBB73_4: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vse32.v v8, (a1)
@@ -6786,11 +12189,11 @@ define void @mscatter_v4f32(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_v4f32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v4f32:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_v4f32:
; RV64ZVE32F: # %bb.0:
@@ -6800,37 +12203,37 @@ define void @mscatter_v4f32(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi a5, a3, 1
-; RV64ZVE32F-NEXT: bnez a5, .LBB64_5
+; RV64ZVE32F-NEXT: bnez a5, .LBB74_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a0, a3, 2
-; RV64ZVE32F-NEXT: bnez a0, .LBB64_6
-; RV64ZVE32F-NEXT: .LBB64_2: # %else2
+; RV64ZVE32F-NEXT: bnez a0, .LBB74_6
+; RV64ZVE32F-NEXT: .LBB74_2: # %else2
; RV64ZVE32F-NEXT: andi a0, a3, 4
-; RV64ZVE32F-NEXT: bnez a0, .LBB64_7
-; RV64ZVE32F-NEXT: .LBB64_3: # %else4
+; RV64ZVE32F-NEXT: bnez a0, .LBB74_7
+; RV64ZVE32F-NEXT: .LBB74_3: # %else4
; RV64ZVE32F-NEXT: andi a3, a3, 8
-; RV64ZVE32F-NEXT: bnez a3, .LBB64_8
-; RV64ZVE32F-NEXT: .LBB64_4: # %else6
+; RV64ZVE32F-NEXT: bnez a3, .LBB74_8
+; RV64ZVE32F-NEXT: .LBB74_4: # %else6
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB64_5: # %cond.store
+; RV64ZVE32F-NEXT: .LBB74_5: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a0)
; RV64ZVE32F-NEXT: andi a0, a3, 2
-; RV64ZVE32F-NEXT: beqz a0, .LBB64_2
-; RV64ZVE32F-NEXT: .LBB64_6: # %cond.store1
+; RV64ZVE32F-NEXT: beqz a0, .LBB74_2
+; RV64ZVE32F-NEXT: .LBB74_6: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vse32.v v9, (a4)
; RV64ZVE32F-NEXT: andi a0, a3, 4
-; RV64ZVE32F-NEXT: beqz a0, .LBB64_3
-; RV64ZVE32F-NEXT: .LBB64_7: # %cond.store3
+; RV64ZVE32F-NEXT: beqz a0, .LBB74_3
+; RV64ZVE32F-NEXT: .LBB74_7: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV64ZVE32F-NEXT: vse32.v v9, (a2)
; RV64ZVE32F-NEXT: andi a3, a3, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB64_4
-; RV64ZVE32F-NEXT: .LBB64_8: # %cond.store5
+; RV64ZVE32F-NEXT: beqz a3, .LBB74_4
+; RV64ZVE32F-NEXT: .LBB74_8: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
; RV64ZVE32F-NEXT: vse32.v v8, (a1)
@@ -6846,11 +12249,11 @@ define void @mscatter_truemask_v4f32(<4 x float> %val, <4 x ptr> %ptrs) {
; RV32-NEXT: vsoxei32.v v8, (zero), v9
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_truemask_v4f32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v10
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_truemask_v4f32:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_truemask_v4f32:
; RV64ZVE32F: # %bb.0:
@@ -6888,11 +12291,11 @@ define void @mscatter_v8f32(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_v8f32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v8f32:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_v8f32:
; RV64ZVE32F: # %bb.0:
@@ -6906,76 +12309,76 @@ define void @mscatter_v8f32(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a4, v0
; RV64ZVE32F-NEXT: andi t1, a4, 1
-; RV64ZVE32F-NEXT: bnez t1, .LBB67_9
+; RV64ZVE32F-NEXT: bnez t1, .LBB77_9
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a0, a4, 2
-; RV64ZVE32F-NEXT: bnez a0, .LBB67_10
-; RV64ZVE32F-NEXT: .LBB67_2: # %else2
+; RV64ZVE32F-NEXT: bnez a0, .LBB77_10
+; RV64ZVE32F-NEXT: .LBB77_2: # %else2
; RV64ZVE32F-NEXT: andi a0, a4, 4
-; RV64ZVE32F-NEXT: bnez a0, .LBB67_11
-; RV64ZVE32F-NEXT: .LBB67_3: # %else4
+; RV64ZVE32F-NEXT: bnez a0, .LBB77_11
+; RV64ZVE32F-NEXT: .LBB77_3: # %else4
; RV64ZVE32F-NEXT: andi a0, a4, 8
-; RV64ZVE32F-NEXT: bnez a0, .LBB67_12
-; RV64ZVE32F-NEXT: .LBB67_4: # %else6
+; RV64ZVE32F-NEXT: bnez a0, .LBB77_12
+; RV64ZVE32F-NEXT: .LBB77_4: # %else6
; RV64ZVE32F-NEXT: andi a0, a4, 16
-; RV64ZVE32F-NEXT: bnez a0, .LBB67_13
-; RV64ZVE32F-NEXT: .LBB67_5: # %else8
+; RV64ZVE32F-NEXT: bnez a0, .LBB77_13
+; RV64ZVE32F-NEXT: .LBB77_5: # %else8
; RV64ZVE32F-NEXT: andi a0, a4, 32
-; RV64ZVE32F-NEXT: bnez a0, .LBB67_14
-; RV64ZVE32F-NEXT: .LBB67_6: # %else10
+; RV64ZVE32F-NEXT: bnez a0, .LBB77_14
+; RV64ZVE32F-NEXT: .LBB77_6: # %else10
; RV64ZVE32F-NEXT: andi a0, a4, 64
-; RV64ZVE32F-NEXT: bnez a0, .LBB67_15
-; RV64ZVE32F-NEXT: .LBB67_7: # %else12
+; RV64ZVE32F-NEXT: bnez a0, .LBB77_15
+; RV64ZVE32F-NEXT: .LBB77_7: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
-; RV64ZVE32F-NEXT: bnez a0, .LBB67_16
-; RV64ZVE32F-NEXT: .LBB67_8: # %else14
+; RV64ZVE32F-NEXT: bnez a0, .LBB77_16
+; RV64ZVE32F-NEXT: .LBB77_8: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB67_9: # %cond.store
+; RV64ZVE32F-NEXT: .LBB77_9: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a0)
; RV64ZVE32F-NEXT: andi a0, a4, 2
-; RV64ZVE32F-NEXT: beqz a0, .LBB67_2
-; RV64ZVE32F-NEXT: .LBB67_10: # %cond.store1
+; RV64ZVE32F-NEXT: beqz a0, .LBB77_2
+; RV64ZVE32F-NEXT: .LBB77_10: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vse32.v v10, (t0)
; RV64ZVE32F-NEXT: andi a0, a4, 4
-; RV64ZVE32F-NEXT: beqz a0, .LBB67_3
-; RV64ZVE32F-NEXT: .LBB67_11: # %cond.store3
+; RV64ZVE32F-NEXT: beqz a0, .LBB77_3
+; RV64ZVE32F-NEXT: .LBB77_11: # %cond.store3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV64ZVE32F-NEXT: vse32.v v10, (a7)
; RV64ZVE32F-NEXT: andi a0, a4, 8
-; RV64ZVE32F-NEXT: beqz a0, .LBB67_4
-; RV64ZVE32F-NEXT: .LBB67_12: # %cond.store5
+; RV64ZVE32F-NEXT: beqz a0, .LBB77_4
+; RV64ZVE32F-NEXT: .LBB77_12: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV64ZVE32F-NEXT: vse32.v v10, (a6)
; RV64ZVE32F-NEXT: andi a0, a4, 16
-; RV64ZVE32F-NEXT: beqz a0, .LBB67_5
-; RV64ZVE32F-NEXT: .LBB67_13: # %cond.store7
+; RV64ZVE32F-NEXT: beqz a0, .LBB77_5
+; RV64ZVE32F-NEXT: .LBB77_13: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v10, (a5)
; RV64ZVE32F-NEXT: andi a0, a4, 32
-; RV64ZVE32F-NEXT: beqz a0, .LBB67_6
-; RV64ZVE32F-NEXT: .LBB67_14: # %cond.store9
+; RV64ZVE32F-NEXT: beqz a0, .LBB77_6
+; RV64ZVE32F-NEXT: .LBB77_14: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v10, (a3)
; RV64ZVE32F-NEXT: andi a0, a4, 64
-; RV64ZVE32F-NEXT: beqz a0, .LBB67_7
-; RV64ZVE32F-NEXT: .LBB67_15: # %cond.store11
+; RV64ZVE32F-NEXT: beqz a0, .LBB77_7
+; RV64ZVE32F-NEXT: .LBB77_15: # %cond.store11
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
; RV64ZVE32F-NEXT: andi a0, a4, -128
-; RV64ZVE32F-NEXT: beqz a0, .LBB67_8
-; RV64ZVE32F-NEXT: .LBB67_16: # %cond.store13
+; RV64ZVE32F-NEXT: beqz a0, .LBB77_8
+; RV64ZVE32F-NEXT: .LBB77_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
@@ -6994,30 +12397,30 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %
; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_v8i8_v8f32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf8 v12, v10
-; RV64-NEXT: vsll.vi v12, v12, 2
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_v8i8_v8f32:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf8 v12, v10
+; RV64V-NEXT: vsll.vi v12, v12, 2
+; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
-; RV64ZVE32F-NEXT: beqz a2, .LBB68_2
+; RV64ZVE32F-NEXT: beqz a2, .LBB78_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a2)
-; RV64ZVE32F-NEXT: .LBB68_2: # %else
+; RV64ZVE32F-NEXT: .LBB78_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB68_4
+; RV64ZVE32F-NEXT: beqz a2, .LBB78_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
@@ -7027,23 +12430,23 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
; RV64ZVE32F-NEXT: vse32.v v11, (a2)
-; RV64ZVE32F-NEXT: .LBB68_4: # %else2
+; RV64ZVE32F-NEXT: .LBB78_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB68_12
+; RV64ZVE32F-NEXT: bnez a2, .LBB78_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB68_13
-; RV64ZVE32F-NEXT: .LBB68_6: # %else6
+; RV64ZVE32F-NEXT: bnez a2, .LBB78_13
+; RV64ZVE32F-NEXT: .LBB78_6: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB68_14
-; RV64ZVE32F-NEXT: .LBB68_7: # %else8
+; RV64ZVE32F-NEXT: bnez a2, .LBB78_14
+; RV64ZVE32F-NEXT: .LBB78_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB68_9
-; RV64ZVE32F-NEXT: .LBB68_8: # %cond.store9
+; RV64ZVE32F-NEXT: beqz a2, .LBB78_9
+; RV64ZVE32F-NEXT: .LBB78_8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -7053,17 +12456,17 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB68_9: # %else10
+; RV64ZVE32F-NEXT: .LBB78_9: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB68_15
+; RV64ZVE32F-NEXT: bnez a2, .LBB78_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB68_16
-; RV64ZVE32F-NEXT: .LBB68_11: # %else14
+; RV64ZVE32F-NEXT: bnez a1, .LBB78_16
+; RV64ZVE32F-NEXT: .LBB78_11: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB68_12: # %cond.store3
+; RV64ZVE32F-NEXT: .LBB78_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -7072,8 +12475,8 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB68_6
-; RV64ZVE32F-NEXT: .LBB68_13: # %cond.store5
+; RV64ZVE32F-NEXT: beqz a2, .LBB78_6
+; RV64ZVE32F-NEXT: .LBB78_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -7083,8 +12486,8 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB68_7
-; RV64ZVE32F-NEXT: .LBB68_14: # %cond.store7
+; RV64ZVE32F-NEXT: beqz a2, .LBB78_7
+; RV64ZVE32F-NEXT: .LBB78_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -7094,9 +12497,9 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB68_8
-; RV64ZVE32F-NEXT: j .LBB68_9
-; RV64ZVE32F-NEXT: .LBB68_15: # %cond.store11
+; RV64ZVE32F-NEXT: bnez a2, .LBB78_8
+; RV64ZVE32F-NEXT: j .LBB78_9
+; RV64ZVE32F-NEXT: .LBB78_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -7105,8 +12508,8 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB68_11
-; RV64ZVE32F-NEXT: .LBB68_16: # %cond.store13
+; RV64ZVE32F-NEXT: beqz a1, .LBB78_11
+; RV64ZVE32F-NEXT: .LBB78_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v10
@@ -7131,30 +12534,30 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8f32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf8 v12, v10
-; RV64-NEXT: vsll.vi v12, v12, 2
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8f32:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf8 v12, v10
+; RV64V-NEXT: vsll.vi v12, v12, 2
+; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
-; RV64ZVE32F-NEXT: beqz a2, .LBB69_2
+; RV64ZVE32F-NEXT: beqz a2, .LBB79_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a2)
-; RV64ZVE32F-NEXT: .LBB69_2: # %else
+; RV64ZVE32F-NEXT: .LBB79_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB69_4
+; RV64ZVE32F-NEXT: beqz a2, .LBB79_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
@@ -7164,23 +12567,23 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
; RV64ZVE32F-NEXT: vse32.v v11, (a2)
-; RV64ZVE32F-NEXT: .LBB69_4: # %else2
+; RV64ZVE32F-NEXT: .LBB79_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB69_12
+; RV64ZVE32F-NEXT: bnez a2, .LBB79_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB69_13
-; RV64ZVE32F-NEXT: .LBB69_6: # %else6
+; RV64ZVE32F-NEXT: bnez a2, .LBB79_13
+; RV64ZVE32F-NEXT: .LBB79_6: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB69_14
-; RV64ZVE32F-NEXT: .LBB69_7: # %else8
+; RV64ZVE32F-NEXT: bnez a2, .LBB79_14
+; RV64ZVE32F-NEXT: .LBB79_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB69_9
-; RV64ZVE32F-NEXT: .LBB69_8: # %cond.store9
+; RV64ZVE32F-NEXT: beqz a2, .LBB79_9
+; RV64ZVE32F-NEXT: .LBB79_8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -7190,17 +12593,17 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB69_9: # %else10
+; RV64ZVE32F-NEXT: .LBB79_9: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB69_15
+; RV64ZVE32F-NEXT: bnez a2, .LBB79_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB69_16
-; RV64ZVE32F-NEXT: .LBB69_11: # %else14
+; RV64ZVE32F-NEXT: bnez a1, .LBB79_16
+; RV64ZVE32F-NEXT: .LBB79_11: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB69_12: # %cond.store3
+; RV64ZVE32F-NEXT: .LBB79_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -7209,8 +12612,8 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB69_6
-; RV64ZVE32F-NEXT: .LBB69_13: # %cond.store5
+; RV64ZVE32F-NEXT: beqz a2, .LBB79_6
+; RV64ZVE32F-NEXT: .LBB79_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -7220,8 +12623,8 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB69_7
-; RV64ZVE32F-NEXT: .LBB69_14: # %cond.store7
+; RV64ZVE32F-NEXT: beqz a2, .LBB79_7
+; RV64ZVE32F-NEXT: .LBB79_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -7231,9 +12634,9 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB69_8
-; RV64ZVE32F-NEXT: j .LBB69_9
-; RV64ZVE32F-NEXT: .LBB69_15: # %cond.store11
+; RV64ZVE32F-NEXT: bnez a2, .LBB79_8
+; RV64ZVE32F-NEXT: j .LBB79_9
+; RV64ZVE32F-NEXT: .LBB79_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -7242,8 +12645,8 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB69_11
-; RV64ZVE32F-NEXT: .LBB69_16: # %cond.store13
+; RV64ZVE32F-NEXT: beqz a1, .LBB79_11
+; RV64ZVE32F-NEXT: .LBB79_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v10
@@ -7270,21 +12673,21 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8f32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vzext.vf2 v11, v10
-; RV64-NEXT: vsll.vi v10, v11, 2
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8f32:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vzext.vf2 v11, v10
+; RV64V-NEXT: vsll.vi v10, v11, 2
+; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64V-NEXT: vsoxei16.v v8, (a0), v10, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
-; RV64ZVE32F-NEXT: beqz a2, .LBB70_2
+; RV64ZVE32F-NEXT: beqz a2, .LBB80_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: andi a2, a2, 255
@@ -7292,9 +12695,9 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a2)
-; RV64ZVE32F-NEXT: .LBB70_2: # %else
+; RV64ZVE32F-NEXT: .LBB80_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB70_4
+; RV64ZVE32F-NEXT: beqz a2, .LBB80_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
@@ -7305,23 +12708,23 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
; RV64ZVE32F-NEXT: vse32.v v11, (a2)
-; RV64ZVE32F-NEXT: .LBB70_4: # %else2
+; RV64ZVE32F-NEXT: .LBB80_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB70_12
+; RV64ZVE32F-NEXT: bnez a2, .LBB80_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB70_13
-; RV64ZVE32F-NEXT: .LBB70_6: # %else6
+; RV64ZVE32F-NEXT: bnez a2, .LBB80_13
+; RV64ZVE32F-NEXT: .LBB80_6: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB70_14
-; RV64ZVE32F-NEXT: .LBB70_7: # %else8
+; RV64ZVE32F-NEXT: bnez a2, .LBB80_14
+; RV64ZVE32F-NEXT: .LBB80_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB70_9
-; RV64ZVE32F-NEXT: .LBB70_8: # %cond.store9
+; RV64ZVE32F-NEXT: beqz a2, .LBB80_9
+; RV64ZVE32F-NEXT: .LBB80_8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -7332,17 +12735,17 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB70_9: # %else10
+; RV64ZVE32F-NEXT: .LBB80_9: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB70_15
+; RV64ZVE32F-NEXT: bnez a2, .LBB80_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB70_16
-; RV64ZVE32F-NEXT: .LBB70_11: # %else14
+; RV64ZVE32F-NEXT: bnez a1, .LBB80_16
+; RV64ZVE32F-NEXT: .LBB80_11: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB70_12: # %cond.store3
+; RV64ZVE32F-NEXT: .LBB80_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -7352,8 +12755,8 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB70_6
-; RV64ZVE32F-NEXT: .LBB70_13: # %cond.store5
+; RV64ZVE32F-NEXT: beqz a2, .LBB80_6
+; RV64ZVE32F-NEXT: .LBB80_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -7364,8 +12767,8 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB70_7
-; RV64ZVE32F-NEXT: .LBB70_14: # %cond.store7
+; RV64ZVE32F-NEXT: beqz a2, .LBB80_7
+; RV64ZVE32F-NEXT: .LBB80_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: andi a2, a2, 255
@@ -7376,9 +12779,9 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB70_8
-; RV64ZVE32F-NEXT: j .LBB70_9
-; RV64ZVE32F-NEXT: .LBB70_15: # %cond.store11
+; RV64ZVE32F-NEXT: bnez a2, .LBB80_8
+; RV64ZVE32F-NEXT: j .LBB80_9
+; RV64ZVE32F-NEXT: .LBB80_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -7388,8 +12791,8 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB70_11
-; RV64ZVE32F-NEXT: .LBB70_16: # %cond.store13
+; RV64ZVE32F-NEXT: beqz a1, .LBB80_11
+; RV64ZVE32F-NEXT: .LBB80_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v10
@@ -7416,21 +12819,21 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16>
; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_v8i16_v8f32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf4 v12, v10
-; RV64-NEXT: vsll.vi v12, v12, 2
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_v8i16_v8f32:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf4 v12, v10
+; RV64V-NEXT: vsll.vi v12, v12, 2
+; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
-; RV64ZVE32F-NEXT: beqz a2, .LBB71_2
+; RV64ZVE32F-NEXT: beqz a2, .LBB81_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -7438,9 +12841,9 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16>
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a2)
-; RV64ZVE32F-NEXT: .LBB71_2: # %else
+; RV64ZVE32F-NEXT: .LBB81_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB71_4
+; RV64ZVE32F-NEXT: beqz a2, .LBB81_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
@@ -7450,23 +12853,23 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16>
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
; RV64ZVE32F-NEXT: vse32.v v11, (a2)
-; RV64ZVE32F-NEXT: .LBB71_4: # %else2
+; RV64ZVE32F-NEXT: .LBB81_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB71_12
+; RV64ZVE32F-NEXT: bnez a2, .LBB81_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB71_13
-; RV64ZVE32F-NEXT: .LBB71_6: # %else6
+; RV64ZVE32F-NEXT: bnez a2, .LBB81_13
+; RV64ZVE32F-NEXT: .LBB81_6: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB71_14
-; RV64ZVE32F-NEXT: .LBB71_7: # %else8
+; RV64ZVE32F-NEXT: bnez a2, .LBB81_14
+; RV64ZVE32F-NEXT: .LBB81_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB71_9
-; RV64ZVE32F-NEXT: .LBB71_8: # %cond.store9
+; RV64ZVE32F-NEXT: beqz a2, .LBB81_9
+; RV64ZVE32F-NEXT: .LBB81_8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -7476,17 +12879,17 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16>
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB71_9: # %else10
+; RV64ZVE32F-NEXT: .LBB81_9: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB71_15
+; RV64ZVE32F-NEXT: bnez a2, .LBB81_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB71_16
-; RV64ZVE32F-NEXT: .LBB71_11: # %else14
+; RV64ZVE32F-NEXT: bnez a1, .LBB81_16
+; RV64ZVE32F-NEXT: .LBB81_11: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB71_12: # %cond.store3
+; RV64ZVE32F-NEXT: .LBB81_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -7495,8 +12898,8 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16>
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB71_6
-; RV64ZVE32F-NEXT: .LBB71_13: # %cond.store5
+; RV64ZVE32F-NEXT: beqz a2, .LBB81_6
+; RV64ZVE32F-NEXT: .LBB81_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -7506,8 +12909,8 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16>
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB71_7
-; RV64ZVE32F-NEXT: .LBB71_14: # %cond.store7
+; RV64ZVE32F-NEXT: beqz a2, .LBB81_7
+; RV64ZVE32F-NEXT: .LBB81_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -7517,9 +12920,9 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16>
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB71_8
-; RV64ZVE32F-NEXT: j .LBB71_9
-; RV64ZVE32F-NEXT: .LBB71_15: # %cond.store11
+; RV64ZVE32F-NEXT: bnez a2, .LBB81_8
+; RV64ZVE32F-NEXT: j .LBB81_9
+; RV64ZVE32F-NEXT: .LBB81_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -7528,8 +12931,8 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16>
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB71_11
-; RV64ZVE32F-NEXT: .LBB71_16: # %cond.store13
+; RV64ZVE32F-NEXT: beqz a1, .LBB81_11
+; RV64ZVE32F-NEXT: .LBB81_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v10
@@ -7554,21 +12957,21 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_sext_v8i16_v8f32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf4 v12, v10
-; RV64-NEXT: vsll.vi v12, v12, 2
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_sext_v8i16_v8f32:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf4 v12, v10
+; RV64V-NEXT: vsll.vi v12, v12, 2
+; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
-; RV64ZVE32F-NEXT: beqz a2, .LBB72_2
+; RV64ZVE32F-NEXT: beqz a2, .LBB82_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -7576,9 +12979,9 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a2)
-; RV64ZVE32F-NEXT: .LBB72_2: # %else
+; RV64ZVE32F-NEXT: .LBB82_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB72_4
+; RV64ZVE32F-NEXT: beqz a2, .LBB82_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
@@ -7588,23 +12991,23 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
; RV64ZVE32F-NEXT: vse32.v v11, (a2)
-; RV64ZVE32F-NEXT: .LBB72_4: # %else2
+; RV64ZVE32F-NEXT: .LBB82_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB72_12
+; RV64ZVE32F-NEXT: bnez a2, .LBB82_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB72_13
-; RV64ZVE32F-NEXT: .LBB72_6: # %else6
+; RV64ZVE32F-NEXT: bnez a2, .LBB82_13
+; RV64ZVE32F-NEXT: .LBB82_6: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB72_14
-; RV64ZVE32F-NEXT: .LBB72_7: # %else8
+; RV64ZVE32F-NEXT: bnez a2, .LBB82_14
+; RV64ZVE32F-NEXT: .LBB82_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB72_9
-; RV64ZVE32F-NEXT: .LBB72_8: # %cond.store9
+; RV64ZVE32F-NEXT: beqz a2, .LBB82_9
+; RV64ZVE32F-NEXT: .LBB82_8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -7614,17 +13017,17 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB72_9: # %else10
+; RV64ZVE32F-NEXT: .LBB82_9: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB72_15
+; RV64ZVE32F-NEXT: bnez a2, .LBB82_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB72_16
-; RV64ZVE32F-NEXT: .LBB72_11: # %else14
+; RV64ZVE32F-NEXT: bnez a1, .LBB82_16
+; RV64ZVE32F-NEXT: .LBB82_11: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB72_12: # %cond.store3
+; RV64ZVE32F-NEXT: .LBB82_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -7633,8 +13036,8 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB72_6
-; RV64ZVE32F-NEXT: .LBB72_13: # %cond.store5
+; RV64ZVE32F-NEXT: beqz a2, .LBB82_6
+; RV64ZVE32F-NEXT: .LBB82_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -7644,8 +13047,8 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB72_7
-; RV64ZVE32F-NEXT: .LBB72_14: # %cond.store7
+; RV64ZVE32F-NEXT: beqz a2, .LBB82_7
+; RV64ZVE32F-NEXT: .LBB82_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -7655,9 +13058,9 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB72_8
-; RV64ZVE32F-NEXT: j .LBB72_9
-; RV64ZVE32F-NEXT: .LBB72_15: # %cond.store11
+; RV64ZVE32F-NEXT: bnez a2, .LBB82_8
+; RV64ZVE32F-NEXT: j .LBB82_9
+; RV64ZVE32F-NEXT: .LBB82_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -7666,8 +13069,8 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB72_11
-; RV64ZVE32F-NEXT: .LBB72_16: # %cond.store13
+; RV64ZVE32F-NEXT: beqz a1, .LBB82_11
+; RV64ZVE32F-NEXT: .LBB82_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v10
@@ -7693,13 +13096,13 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8f32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV64-NEXT: vzext.vf2 v12, v10
-; RV64-NEXT: vsll.vi v10, v12, 2
-; RV64-NEXT: vsoxei32.v v8, (a0), v10, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_zext_v8i16_v8f32:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64V-NEXT: vzext.vf2 v12, v10
+; RV64V-NEXT: vsll.vi v10, v12, 2
+; RV64V-NEXT: vsoxei32.v v8, (a0), v10, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f32:
; RV64ZVE32F: # %bb.0:
@@ -7708,7 +13111,7 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: addiw a1, a1, -1
-; RV64ZVE32F-NEXT: beqz a3, .LBB73_2
+; RV64ZVE32F-NEXT: beqz a3, .LBB83_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
@@ -7717,9 +13120,9 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a3)
-; RV64ZVE32F-NEXT: .LBB73_2: # %else
+; RV64ZVE32F-NEXT: .LBB83_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB73_4
+; RV64ZVE32F-NEXT: beqz a3, .LBB83_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
@@ -7730,23 +13133,23 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
; RV64ZVE32F-NEXT: vse32.v v11, (a3)
-; RV64ZVE32F-NEXT: .LBB73_4: # %else2
+; RV64ZVE32F-NEXT: .LBB83_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB73_12
+; RV64ZVE32F-NEXT: bnez a3, .LBB83_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: bnez a3, .LBB73_13
-; RV64ZVE32F-NEXT: .LBB73_6: # %else6
+; RV64ZVE32F-NEXT: bnez a3, .LBB83_13
+; RV64ZVE32F-NEXT: .LBB83_6: # %else6
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: bnez a3, .LBB73_14
-; RV64ZVE32F-NEXT: .LBB73_7: # %else8
+; RV64ZVE32F-NEXT: bnez a3, .LBB83_14
+; RV64ZVE32F-NEXT: .LBB83_7: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: beqz a3, .LBB73_9
-; RV64ZVE32F-NEXT: .LBB73_8: # %cond.store9
+; RV64ZVE32F-NEXT: beqz a3, .LBB83_9
+; RV64ZVE32F-NEXT: .LBB83_8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
@@ -7757,17 +13160,17 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a3)
-; RV64ZVE32F-NEXT: .LBB73_9: # %else10
+; RV64ZVE32F-NEXT: .LBB83_9: # %else10
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB73_15
+; RV64ZVE32F-NEXT: bnez a3, .LBB83_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
; RV64ZVE32F-NEXT: andi a2, a2, -128
-; RV64ZVE32F-NEXT: bnez a2, .LBB73_16
-; RV64ZVE32F-NEXT: .LBB73_11: # %else14
+; RV64ZVE32F-NEXT: bnez a2, .LBB83_16
+; RV64ZVE32F-NEXT: .LBB83_11: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB73_12: # %cond.store3
+; RV64ZVE32F-NEXT: .LBB83_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
@@ -7777,8 +13180,8 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB73_6
-; RV64ZVE32F-NEXT: .LBB73_13: # %cond.store5
+; RV64ZVE32F-NEXT: beqz a3, .LBB83_6
+; RV64ZVE32F-NEXT: .LBB83_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
@@ -7789,8 +13192,8 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV64ZVE32F-NEXT: vse32.v v10, (a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: beqz a3, .LBB73_7
-; RV64ZVE32F-NEXT: .LBB73_14: # %cond.store7
+; RV64ZVE32F-NEXT: beqz a3, .LBB83_7
+; RV64ZVE32F-NEXT: .LBB83_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v11
; RV64ZVE32F-NEXT: and a3, a3, a1
@@ -7801,9 +13204,9 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: bnez a3, .LBB73_8
-; RV64ZVE32F-NEXT: j .LBB73_9
-; RV64ZVE32F-NEXT: .LBB73_15: # %cond.store11
+; RV64ZVE32F-NEXT: bnez a3, .LBB83_8
+; RV64ZVE32F-NEXT: j .LBB83_9
+; RV64ZVE32F-NEXT: .LBB83_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
@@ -7813,8 +13216,8 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a3)
; RV64ZVE32F-NEXT: andi a2, a2, -128
-; RV64ZVE32F-NEXT: beqz a2, .LBB73_11
-; RV64ZVE32F-NEXT: .LBB73_16: # %cond.store13
+; RV64ZVE32F-NEXT: beqz a2, .LBB83_11
+; RV64ZVE32F-NEXT: .LBB83_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -7840,30 +13243,30 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs
; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_v8f32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf2 v12, v10
-; RV64-NEXT: vsll.vi v12, v12, 2
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_v8f32:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf2 v12, v10
+; RV64V-NEXT: vsll.vi v12, v12, 2
+; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_baseidx_v8f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
-; RV64ZVE32F-NEXT: beqz a2, .LBB74_2
+; RV64ZVE32F-NEXT: beqz a2, .LBB84_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vse32.v v8, (a2)
-; RV64ZVE32F-NEXT: .LBB74_2: # %else
+; RV64ZVE32F-NEXT: .LBB84_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB74_4
+; RV64ZVE32F-NEXT: beqz a2, .LBB84_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1
@@ -7872,23 +13275,23 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB74_4: # %else2
+; RV64ZVE32F-NEXT: .LBB84_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 4
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB74_12
+; RV64ZVE32F-NEXT: bnez a2, .LBB84_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB74_13
-; RV64ZVE32F-NEXT: .LBB74_6: # %else6
+; RV64ZVE32F-NEXT: bnez a2, .LBB84_13
+; RV64ZVE32F-NEXT: .LBB84_6: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB74_14
-; RV64ZVE32F-NEXT: .LBB74_7: # %else8
+; RV64ZVE32F-NEXT: bnez a2, .LBB84_14
+; RV64ZVE32F-NEXT: .LBB84_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB74_9
-; RV64ZVE32F-NEXT: .LBB74_8: # %cond.store9
+; RV64ZVE32F-NEXT: beqz a2, .LBB84_9
+; RV64ZVE32F-NEXT: .LBB84_8: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -7898,17 +13301,17 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
-; RV64ZVE32F-NEXT: .LBB74_9: # %else10
+; RV64ZVE32F-NEXT: .LBB84_9: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB74_15
+; RV64ZVE32F-NEXT: bnez a2, .LBB84_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB74_16
-; RV64ZVE32F-NEXT: .LBB74_11: # %else14
+; RV64ZVE32F-NEXT: bnez a1, .LBB84_16
+; RV64ZVE32F-NEXT: .LBB84_11: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB74_12: # %cond.store3
+; RV64ZVE32F-NEXT: .LBB84_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -7916,8 +13319,8 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v11, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB74_6
-; RV64ZVE32F-NEXT: .LBB74_13: # %cond.store5
+; RV64ZVE32F-NEXT: beqz a2, .LBB84_6
+; RV64ZVE32F-NEXT: .LBB84_13: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -7926,8 +13329,8 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB74_7
-; RV64ZVE32F-NEXT: .LBB74_14: # %cond.store7
+; RV64ZVE32F-NEXT: beqz a2, .LBB84_7
+; RV64ZVE32F-NEXT: .LBB84_14: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: slli a2, a2, 2
@@ -7936,9 +13339,9 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB74_8
-; RV64ZVE32F-NEXT: j .LBB74_9
-; RV64ZVE32F-NEXT: .LBB74_15: # %cond.store11
+; RV64ZVE32F-NEXT: bnez a2, .LBB84_8
+; RV64ZVE32F-NEXT: j .LBB84_9
+; RV64ZVE32F-NEXT: .LBB84_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -7947,8 +13350,8 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB74_11
-; RV64ZVE32F-NEXT: .LBB74_16: # %cond.store13
+; RV64ZVE32F-NEXT: beqz a1, .LBB84_11
+; RV64ZVE32F-NEXT: .LBB84_16: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v10
@@ -7973,32 +13376,32 @@ define void @mscatter_v1f64(<1 x double> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_v1f64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v1f64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_v1f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vfirst.m a0, v0
-; RV32ZVE32F-NEXT: bnez a0, .LBB75_2
+; RV32ZVE32F-NEXT: bnez a0, .LBB85_2
; RV32ZVE32F-NEXT: # %bb.1: # %cond.store
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
-; RV32ZVE32F-NEXT: .LBB75_2: # %else
+; RV32ZVE32F-NEXT: .LBB85_2: # %else
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_v1f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vfirst.m a1, v0
-; RV64ZVE32F-NEXT: bnez a1, .LBB75_2
+; RV64ZVE32F-NEXT: bnez a1, .LBB85_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
-; RV64ZVE32F-NEXT: .LBB75_2: # %else
+; RV64ZVE32F-NEXT: .LBB85_2: # %else
; RV64ZVE32F-NEXT: ret
call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> %val, <1 x ptr> %ptrs, i32 8, <1 x i1> %m)
ret void
@@ -8013,30 +13416,30 @@ define void @mscatter_v2f64(<2 x double> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_v2f64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v2f64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_v2f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a0, v0
; RV32ZVE32F-NEXT: andi a1, a0, 1
-; RV32ZVE32F-NEXT: bnez a1, .LBB76_3
+; RV32ZVE32F-NEXT: bnez a1, .LBB86_3
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a0, 2
-; RV32ZVE32F-NEXT: bnez a0, .LBB76_4
-; RV32ZVE32F-NEXT: .LBB76_2: # %else2
+; RV32ZVE32F-NEXT: bnez a0, .LBB86_4
+; RV32ZVE32F-NEXT: .LBB86_2: # %else2
; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB76_3: # %cond.store
+; RV32ZVE32F-NEXT: .LBB86_3: # %cond.store
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
; RV32ZVE32F-NEXT: andi a0, a0, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB76_2
-; RV32ZVE32F-NEXT: .LBB76_4: # %cond.store1
+; RV32ZVE32F-NEXT: beqz a0, .LBB86_2
+; RV32ZVE32F-NEXT: .LBB86_4: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -8048,17 +13451,17 @@ define void @mscatter_v2f64(<2 x double> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
-; RV64ZVE32F-NEXT: bnez a3, .LBB76_3
+; RV64ZVE32F-NEXT: bnez a3, .LBB86_3
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB76_4
-; RV64ZVE32F-NEXT: .LBB76_2: # %else2
+; RV64ZVE32F-NEXT: bnez a2, .LBB86_4
+; RV64ZVE32F-NEXT: .LBB86_2: # %else2
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB76_3: # %cond.store
+; RV64ZVE32F-NEXT: .LBB86_3: # %cond.store
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: andi a2, a2, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB76_2
-; RV64ZVE32F-NEXT: .LBB76_4: # %cond.store1
+; RV64ZVE32F-NEXT: beqz a2, .LBB86_2
+; RV64ZVE32F-NEXT: .LBB86_4: # %cond.store1
; RV64ZVE32F-NEXT: fsd fa1, 0(a1)
; RV64ZVE32F-NEXT: ret
call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> %val, <2 x ptr> %ptrs, i32 8, <2 x i1> %m)
@@ -8074,50 +13477,50 @@ define void @mscatter_v4f64(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV32V-NEXT: vsoxei32.v v8, (zero), v10, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_v4f64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v4f64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_v4f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a0, v0
; RV32ZVE32F-NEXT: andi a1, a0, 1
-; RV32ZVE32F-NEXT: bnez a1, .LBB77_5
+; RV32ZVE32F-NEXT: bnez a1, .LBB87_5
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a1, a0, 2
-; RV32ZVE32F-NEXT: bnez a1, .LBB77_6
-; RV32ZVE32F-NEXT: .LBB77_2: # %else2
+; RV32ZVE32F-NEXT: bnez a1, .LBB87_6
+; RV32ZVE32F-NEXT: .LBB87_2: # %else2
; RV32ZVE32F-NEXT: andi a1, a0, 4
-; RV32ZVE32F-NEXT: bnez a1, .LBB77_7
-; RV32ZVE32F-NEXT: .LBB77_3: # %else4
+; RV32ZVE32F-NEXT: bnez a1, .LBB87_7
+; RV32ZVE32F-NEXT: .LBB87_3: # %else4
; RV32ZVE32F-NEXT: andi a0, a0, 8
-; RV32ZVE32F-NEXT: bnez a0, .LBB77_8
-; RV32ZVE32F-NEXT: .LBB77_4: # %else6
+; RV32ZVE32F-NEXT: bnez a0, .LBB87_8
+; RV32ZVE32F-NEXT: .LBB87_4: # %else6
; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB77_5: # %cond.store
+; RV32ZVE32F-NEXT: .LBB87_5: # %cond.store
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a0, 2
-; RV32ZVE32F-NEXT: beqz a1, .LBB77_2
-; RV32ZVE32F-NEXT: .LBB77_6: # %cond.store1
+; RV32ZVE32F-NEXT: beqz a1, .LBB87_2
+; RV32ZVE32F-NEXT: .LBB87_6: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v9
; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a0, 4
-; RV32ZVE32F-NEXT: beqz a1, .LBB77_3
-; RV32ZVE32F-NEXT: .LBB77_7: # %cond.store3
+; RV32ZVE32F-NEXT: beqz a1, .LBB87_3
+; RV32ZVE32F-NEXT: .LBB87_7: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v9
; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
; RV32ZVE32F-NEXT: andi a0, a0, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB77_4
-; RV32ZVE32F-NEXT: .LBB77_8: # %cond.store5
+; RV32ZVE32F-NEXT: beqz a0, .LBB87_4
+; RV32ZVE32F-NEXT: .LBB87_8: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -8132,32 +13535,32 @@ define void @mscatter_v4f64(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi a5, a3, 1
-; RV64ZVE32F-NEXT: bnez a5, .LBB77_5
+; RV64ZVE32F-NEXT: bnez a5, .LBB87_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a0, a3, 2
-; RV64ZVE32F-NEXT: bnez a0, .LBB77_6
-; RV64ZVE32F-NEXT: .LBB77_2: # %else2
+; RV64ZVE32F-NEXT: bnez a0, .LBB87_6
+; RV64ZVE32F-NEXT: .LBB87_2: # %else2
; RV64ZVE32F-NEXT: andi a0, a3, 4
-; RV64ZVE32F-NEXT: bnez a0, .LBB77_7
-; RV64ZVE32F-NEXT: .LBB77_3: # %else4
+; RV64ZVE32F-NEXT: bnez a0, .LBB87_7
+; RV64ZVE32F-NEXT: .LBB87_3: # %else4
; RV64ZVE32F-NEXT: andi a3, a3, 8
-; RV64ZVE32F-NEXT: bnez a3, .LBB77_8
-; RV64ZVE32F-NEXT: .LBB77_4: # %else6
+; RV64ZVE32F-NEXT: bnez a3, .LBB87_8
+; RV64ZVE32F-NEXT: .LBB87_4: # %else6
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB77_5: # %cond.store
+; RV64ZVE32F-NEXT: .LBB87_5: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: andi a0, a3, 2
-; RV64ZVE32F-NEXT: beqz a0, .LBB77_2
-; RV64ZVE32F-NEXT: .LBB77_6: # %cond.store1
+; RV64ZVE32F-NEXT: beqz a0, .LBB87_2
+; RV64ZVE32F-NEXT: .LBB87_6: # %cond.store1
; RV64ZVE32F-NEXT: fsd fa1, 0(a4)
; RV64ZVE32F-NEXT: andi a0, a3, 4
-; RV64ZVE32F-NEXT: beqz a0, .LBB77_3
-; RV64ZVE32F-NEXT: .LBB77_7: # %cond.store3
+; RV64ZVE32F-NEXT: beqz a0, .LBB87_3
+; RV64ZVE32F-NEXT: .LBB87_7: # %cond.store3
; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
; RV64ZVE32F-NEXT: andi a3, a3, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB77_4
-; RV64ZVE32F-NEXT: .LBB77_8: # %cond.store5
+; RV64ZVE32F-NEXT: beqz a3, .LBB87_4
+; RV64ZVE32F-NEXT: .LBB87_8: # %cond.store5
; RV64ZVE32F-NEXT: fsd fa3, 0(a1)
; RV64ZVE32F-NEXT: ret
call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> %m)
@@ -8171,11 +13574,11 @@ define void @mscatter_truemask_v4f64(<4 x double> %val, <4 x ptr> %ptrs) {
; RV32V-NEXT: vsoxei32.v v8, (zero), v10
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_truemask_v4f64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v10
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_truemask_v4f64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_truemask_v4f64:
; RV32ZVE32F: # %bb.0:
@@ -8225,90 +13628,90 @@ define void @mscatter_v8f64(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV32V-NEXT: vsoxei32.v v8, (zero), v12, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_v8f64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_v8f64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_v8f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a0, v0
; RV32ZVE32F-NEXT: andi a1, a0, 1
-; RV32ZVE32F-NEXT: bnez a1, .LBB80_9
+; RV32ZVE32F-NEXT: bnez a1, .LBB90_9
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a1, a0, 2
-; RV32ZVE32F-NEXT: bnez a1, .LBB80_10
-; RV32ZVE32F-NEXT: .LBB80_2: # %else2
+; RV32ZVE32F-NEXT: bnez a1, .LBB90_10
+; RV32ZVE32F-NEXT: .LBB90_2: # %else2
; RV32ZVE32F-NEXT: andi a1, a0, 4
-; RV32ZVE32F-NEXT: bnez a1, .LBB80_11
-; RV32ZVE32F-NEXT: .LBB80_3: # %else4
+; RV32ZVE32F-NEXT: bnez a1, .LBB90_11
+; RV32ZVE32F-NEXT: .LBB90_3: # %else4
; RV32ZVE32F-NEXT: andi a1, a0, 8
-; RV32ZVE32F-NEXT: bnez a1, .LBB80_12
-; RV32ZVE32F-NEXT: .LBB80_4: # %else6
+; RV32ZVE32F-NEXT: bnez a1, .LBB90_12
+; RV32ZVE32F-NEXT: .LBB90_4: # %else6
; RV32ZVE32F-NEXT: andi a1, a0, 16
-; RV32ZVE32F-NEXT: bnez a1, .LBB80_13
-; RV32ZVE32F-NEXT: .LBB80_5: # %else8
+; RV32ZVE32F-NEXT: bnez a1, .LBB90_13
+; RV32ZVE32F-NEXT: .LBB90_5: # %else8
; RV32ZVE32F-NEXT: andi a1, a0, 32
-; RV32ZVE32F-NEXT: bnez a1, .LBB80_14
-; RV32ZVE32F-NEXT: .LBB80_6: # %else10
+; RV32ZVE32F-NEXT: bnez a1, .LBB90_14
+; RV32ZVE32F-NEXT: .LBB90_6: # %else10
; RV32ZVE32F-NEXT: andi a1, a0, 64
-; RV32ZVE32F-NEXT: bnez a1, .LBB80_15
-; RV32ZVE32F-NEXT: .LBB80_7: # %else12
+; RV32ZVE32F-NEXT: bnez a1, .LBB90_15
+; RV32ZVE32F-NEXT: .LBB90_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a0, -128
-; RV32ZVE32F-NEXT: bnez a0, .LBB80_16
-; RV32ZVE32F-NEXT: .LBB80_8: # %else14
+; RV32ZVE32F-NEXT: bnez a0, .LBB90_16
+; RV32ZVE32F-NEXT: .LBB90_8: # %else14
; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB80_9: # %cond.store
+; RV32ZVE32F-NEXT: .LBB90_9: # %cond.store
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a0, 2
-; RV32ZVE32F-NEXT: beqz a1, .LBB80_2
-; RV32ZVE32F-NEXT: .LBB80_10: # %cond.store1
+; RV32ZVE32F-NEXT: beqz a1, .LBB90_2
+; RV32ZVE32F-NEXT: .LBB90_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a0, 4
-; RV32ZVE32F-NEXT: beqz a1, .LBB80_3
-; RV32ZVE32F-NEXT: .LBB80_11: # %cond.store3
+; RV32ZVE32F-NEXT: beqz a1, .LBB90_3
+; RV32ZVE32F-NEXT: .LBB90_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a0, 8
-; RV32ZVE32F-NEXT: beqz a1, .LBB80_4
-; RV32ZVE32F-NEXT: .LBB80_12: # %cond.store5
+; RV32ZVE32F-NEXT: beqz a1, .LBB90_4
+; RV32ZVE32F-NEXT: .LBB90_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa3, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a0, 16
-; RV32ZVE32F-NEXT: beqz a1, .LBB80_5
-; RV32ZVE32F-NEXT: .LBB80_13: # %cond.store7
+; RV32ZVE32F-NEXT: beqz a1, .LBB90_5
+; RV32ZVE32F-NEXT: .LBB90_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa4, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a0, 32
-; RV32ZVE32F-NEXT: beqz a1, .LBB80_6
-; RV32ZVE32F-NEXT: .LBB80_14: # %cond.store9
+; RV32ZVE32F-NEXT: beqz a1, .LBB90_6
+; RV32ZVE32F-NEXT: .LBB90_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa5, 0(a1)
; RV32ZVE32F-NEXT: andi a1, a0, 64
-; RV32ZVE32F-NEXT: beqz a1, .LBB80_7
-; RV32ZVE32F-NEXT: .LBB80_15: # %cond.store11
+; RV32ZVE32F-NEXT: beqz a1, .LBB90_7
+; RV32ZVE32F-NEXT: .LBB90_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v10
; RV32ZVE32F-NEXT: fsd fa6, 0(a1)
; RV32ZVE32F-NEXT: andi a0, a0, -128
-; RV32ZVE32F-NEXT: beqz a0, .LBB80_8
-; RV32ZVE32F-NEXT: .LBB80_16: # %cond.store13
+; RV32ZVE32F-NEXT: beqz a0, .LBB90_8
+; RV32ZVE32F-NEXT: .LBB90_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -8327,60 +13730,60 @@ define void @mscatter_v8f64(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a4, v0
; RV64ZVE32F-NEXT: andi t1, a4, 1
-; RV64ZVE32F-NEXT: bnez t1, .LBB80_9
+; RV64ZVE32F-NEXT: bnez t1, .LBB90_9
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a0, a4, 2
-; RV64ZVE32F-NEXT: bnez a0, .LBB80_10
-; RV64ZVE32F-NEXT: .LBB80_2: # %else2
+; RV64ZVE32F-NEXT: bnez a0, .LBB90_10
+; RV64ZVE32F-NEXT: .LBB90_2: # %else2
; RV64ZVE32F-NEXT: andi a0, a4, 4
-; RV64ZVE32F-NEXT: bnez a0, .LBB80_11
-; RV64ZVE32F-NEXT: .LBB80_3: # %else4
+; RV64ZVE32F-NEXT: bnez a0, .LBB90_11
+; RV64ZVE32F-NEXT: .LBB90_3: # %else4
; RV64ZVE32F-NEXT: andi a0, a4, 8
-; RV64ZVE32F-NEXT: bnez a0, .LBB80_12
-; RV64ZVE32F-NEXT: .LBB80_4: # %else6
+; RV64ZVE32F-NEXT: bnez a0, .LBB90_12
+; RV64ZVE32F-NEXT: .LBB90_4: # %else6
; RV64ZVE32F-NEXT: andi a0, a4, 16
-; RV64ZVE32F-NEXT: bnez a0, .LBB80_13
-; RV64ZVE32F-NEXT: .LBB80_5: # %else8
+; RV64ZVE32F-NEXT: bnez a0, .LBB90_13
+; RV64ZVE32F-NEXT: .LBB90_5: # %else8
; RV64ZVE32F-NEXT: andi a0, a4, 32
-; RV64ZVE32F-NEXT: bnez a0, .LBB80_14
-; RV64ZVE32F-NEXT: .LBB80_6: # %else10
+; RV64ZVE32F-NEXT: bnez a0, .LBB90_14
+; RV64ZVE32F-NEXT: .LBB90_6: # %else10
; RV64ZVE32F-NEXT: andi a0, a4, 64
-; RV64ZVE32F-NEXT: bnez a0, .LBB80_15
-; RV64ZVE32F-NEXT: .LBB80_7: # %else12
+; RV64ZVE32F-NEXT: bnez a0, .LBB90_15
+; RV64ZVE32F-NEXT: .LBB90_7: # %else12
; RV64ZVE32F-NEXT: andi a0, a4, -128
-; RV64ZVE32F-NEXT: bnez a0, .LBB80_16
-; RV64ZVE32F-NEXT: .LBB80_8: # %else14
+; RV64ZVE32F-NEXT: bnez a0, .LBB90_16
+; RV64ZVE32F-NEXT: .LBB90_8: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB80_9: # %cond.store
+; RV64ZVE32F-NEXT: .LBB90_9: # %cond.store
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: andi a0, a4, 2
-; RV64ZVE32F-NEXT: beqz a0, .LBB80_2
-; RV64ZVE32F-NEXT: .LBB80_10: # %cond.store1
+; RV64ZVE32F-NEXT: beqz a0, .LBB90_2
+; RV64ZVE32F-NEXT: .LBB90_10: # %cond.store1
; RV64ZVE32F-NEXT: fsd fa1, 0(t0)
; RV64ZVE32F-NEXT: andi a0, a4, 4
-; RV64ZVE32F-NEXT: beqz a0, .LBB80_3
-; RV64ZVE32F-NEXT: .LBB80_11: # %cond.store3
+; RV64ZVE32F-NEXT: beqz a0, .LBB90_3
+; RV64ZVE32F-NEXT: .LBB90_11: # %cond.store3
; RV64ZVE32F-NEXT: fsd fa2, 0(a7)
; RV64ZVE32F-NEXT: andi a0, a4, 8
-; RV64ZVE32F-NEXT: beqz a0, .LBB80_4
-; RV64ZVE32F-NEXT: .LBB80_12: # %cond.store5
+; RV64ZVE32F-NEXT: beqz a0, .LBB90_4
+; RV64ZVE32F-NEXT: .LBB90_12: # %cond.store5
; RV64ZVE32F-NEXT: fsd fa3, 0(a6)
; RV64ZVE32F-NEXT: andi a0, a4, 16
-; RV64ZVE32F-NEXT: beqz a0, .LBB80_5
-; RV64ZVE32F-NEXT: .LBB80_13: # %cond.store7
+; RV64ZVE32F-NEXT: beqz a0, .LBB90_5
+; RV64ZVE32F-NEXT: .LBB90_13: # %cond.store7
; RV64ZVE32F-NEXT: fsd fa4, 0(a5)
; RV64ZVE32F-NEXT: andi a0, a4, 32
-; RV64ZVE32F-NEXT: beqz a0, .LBB80_6
-; RV64ZVE32F-NEXT: .LBB80_14: # %cond.store9
+; RV64ZVE32F-NEXT: beqz a0, .LBB90_6
+; RV64ZVE32F-NEXT: .LBB90_14: # %cond.store9
; RV64ZVE32F-NEXT: fsd fa5, 0(a3)
; RV64ZVE32F-NEXT: andi a0, a4, 64
-; RV64ZVE32F-NEXT: beqz a0, .LBB80_7
-; RV64ZVE32F-NEXT: .LBB80_15: # %cond.store11
+; RV64ZVE32F-NEXT: beqz a0, .LBB90_7
+; RV64ZVE32F-NEXT: .LBB90_15: # %cond.store11
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
; RV64ZVE32F-NEXT: andi a0, a4, -128
-; RV64ZVE32F-NEXT: beqz a0, .LBB80_8
-; RV64ZVE32F-NEXT: .LBB80_16: # %cond.store13
+; RV64ZVE32F-NEXT: beqz a0, .LBB90_8
+; RV64ZVE32F-NEXT: .LBB90_16: # %cond.store13
; RV64ZVE32F-NEXT: fsd fa7, 0(a1)
; RV64ZVE32F-NEXT: ret
call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
@@ -8397,13 +13800,13 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8>
; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_v8i8_v8f64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf8 v16, v12
-; RV64-NEXT: vsll.vi v12, v16, 3
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_v8i8_v8f64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf8 v16, v12
+; RV64V-NEXT: vsll.vi v12, v16, 3
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_baseidx_v8i8_v8f64:
; RV32ZVE32F: # %bb.0:
@@ -8415,78 +13818,78 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8>
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
-; RV32ZVE32F-NEXT: bnez a2, .LBB81_9
+; RV32ZVE32F-NEXT: bnez a2, .LBB91_9
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: bnez a0, .LBB81_10
-; RV32ZVE32F-NEXT: .LBB81_2: # %else2
+; RV32ZVE32F-NEXT: bnez a0, .LBB91_10
+; RV32ZVE32F-NEXT: .LBB91_2: # %else2
; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: bnez a0, .LBB81_11
-; RV32ZVE32F-NEXT: .LBB81_3: # %else4
+; RV32ZVE32F-NEXT: bnez a0, .LBB91_11
+; RV32ZVE32F-NEXT: .LBB91_3: # %else4
; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: bnez a0, .LBB81_12
-; RV32ZVE32F-NEXT: .LBB81_4: # %else6
+; RV32ZVE32F-NEXT: bnez a0, .LBB91_12
+; RV32ZVE32F-NEXT: .LBB91_4: # %else6
; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: bnez a0, .LBB81_13
-; RV32ZVE32F-NEXT: .LBB81_5: # %else8
+; RV32ZVE32F-NEXT: bnez a0, .LBB91_13
+; RV32ZVE32F-NEXT: .LBB91_5: # %else8
; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: bnez a0, .LBB81_14
-; RV32ZVE32F-NEXT: .LBB81_6: # %else10
+; RV32ZVE32F-NEXT: bnez a0, .LBB91_14
+; RV32ZVE32F-NEXT: .LBB91_6: # %else10
; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: bnez a0, .LBB81_15
-; RV32ZVE32F-NEXT: .LBB81_7: # %else12
+; RV32ZVE32F-NEXT: bnez a0, .LBB91_15
+; RV32ZVE32F-NEXT: .LBB91_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: bnez a0, .LBB81_16
-; RV32ZVE32F-NEXT: .LBB81_8: # %else14
+; RV32ZVE32F-NEXT: bnez a0, .LBB91_16
+; RV32ZVE32F-NEXT: .LBB91_8: # %else14
; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB81_9: # %cond.store
+; RV32ZVE32F-NEXT: .LBB91_9: # %cond.store
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB81_2
-; RV32ZVE32F-NEXT: .LBB81_10: # %cond.store1
+; RV32ZVE32F-NEXT: beqz a0, .LBB91_2
+; RV32ZVE32F-NEXT: .LBB91_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB81_3
-; RV32ZVE32F-NEXT: .LBB81_11: # %cond.store3
+; RV32ZVE32F-NEXT: beqz a0, .LBB91_3
+; RV32ZVE32F-NEXT: .LBB91_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB81_4
-; RV32ZVE32F-NEXT: .LBB81_12: # %cond.store5
+; RV32ZVE32F-NEXT: beqz a0, .LBB91_4
+; RV32ZVE32F-NEXT: .LBB91_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: beqz a0, .LBB81_5
-; RV32ZVE32F-NEXT: .LBB81_13: # %cond.store7
+; RV32ZVE32F-NEXT: beqz a0, .LBB91_5
+; RV32ZVE32F-NEXT: .LBB91_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: beqz a0, .LBB81_6
-; RV32ZVE32F-NEXT: .LBB81_14: # %cond.store9
+; RV32ZVE32F-NEXT: beqz a0, .LBB91_6
+; RV32ZVE32F-NEXT: .LBB91_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: beqz a0, .LBB81_7
-; RV32ZVE32F-NEXT: .LBB81_15: # %cond.store11
+; RV32ZVE32F-NEXT: beqz a0, .LBB91_7
+; RV32ZVE32F-NEXT: .LBB91_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: beqz a0, .LBB81_8
-; RV32ZVE32F-NEXT: .LBB81_16: # %cond.store13
+; RV32ZVE32F-NEXT: beqz a0, .LBB91_8
+; RV32ZVE32F-NEXT: .LBB91_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -8498,15 +13901,15 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8>
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
-; RV64ZVE32F-NEXT: beqz a2, .LBB81_2
+; RV64ZVE32F-NEXT: beqz a2, .LBB91_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
-; RV64ZVE32F-NEXT: .LBB81_2: # %else
+; RV64ZVE32F-NEXT: .LBB91_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB81_4
+; RV64ZVE32F-NEXT: beqz a2, .LBB91_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
@@ -8514,68 +13917,68 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8>
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
-; RV64ZVE32F-NEXT: .LBB81_4: # %else2
+; RV64ZVE32F-NEXT: .LBB91_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB81_12
+; RV64ZVE32F-NEXT: bnez a2, .LBB91_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB81_13
-; RV64ZVE32F-NEXT: .LBB81_6: # %else6
+; RV64ZVE32F-NEXT: bnez a2, .LBB91_13
+; RV64ZVE32F-NEXT: .LBB91_6: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB81_14
-; RV64ZVE32F-NEXT: .LBB81_7: # %else8
+; RV64ZVE32F-NEXT: bnez a2, .LBB91_14
+; RV64ZVE32F-NEXT: .LBB91_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB81_9
-; RV64ZVE32F-NEXT: .LBB81_8: # %cond.store9
+; RV64ZVE32F-NEXT: beqz a2, .LBB91_9
+; RV64ZVE32F-NEXT: .LBB91_8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
-; RV64ZVE32F-NEXT: .LBB81_9: # %else10
+; RV64ZVE32F-NEXT: .LBB91_9: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB81_15
+; RV64ZVE32F-NEXT: bnez a2, .LBB91_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB81_16
-; RV64ZVE32F-NEXT: .LBB81_11: # %else14
+; RV64ZVE32F-NEXT: bnez a1, .LBB91_16
+; RV64ZVE32F-NEXT: .LBB91_11: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB81_12: # %cond.store3
+; RV64ZVE32F-NEXT: .LBB91_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB81_6
-; RV64ZVE32F-NEXT: .LBB81_13: # %cond.store5
+; RV64ZVE32F-NEXT: beqz a2, .LBB91_6
+; RV64ZVE32F-NEXT: .LBB91_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB81_7
-; RV64ZVE32F-NEXT: .LBB81_14: # %cond.store7
+; RV64ZVE32F-NEXT: beqz a2, .LBB91_7
+; RV64ZVE32F-NEXT: .LBB91_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB81_8
-; RV64ZVE32F-NEXT: j .LBB81_9
-; RV64ZVE32F-NEXT: .LBB81_15: # %cond.store11
+; RV64ZVE32F-NEXT: bnez a2, .LBB91_8
+; RV64ZVE32F-NEXT: j .LBB91_9
+; RV64ZVE32F-NEXT: .LBB91_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB81_11
-; RV64ZVE32F-NEXT: .LBB81_16: # %cond.store13
+; RV64ZVE32F-NEXT: beqz a1, .LBB91_11
+; RV64ZVE32F-NEXT: .LBB91_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 3
@@ -8597,13 +14000,13 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8f64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf8 v16, v12
-; RV64-NEXT: vsll.vi v12, v16, 3
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8f64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf8 v16, v12
+; RV64V-NEXT: vsll.vi v12, v16, 3
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8f64:
; RV32ZVE32F: # %bb.0:
@@ -8615,78 +14018,78 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
-; RV32ZVE32F-NEXT: bnez a2, .LBB82_9
+; RV32ZVE32F-NEXT: bnez a2, .LBB92_9
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: bnez a0, .LBB82_10
-; RV32ZVE32F-NEXT: .LBB82_2: # %else2
+; RV32ZVE32F-NEXT: bnez a0, .LBB92_10
+; RV32ZVE32F-NEXT: .LBB92_2: # %else2
; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: bnez a0, .LBB82_11
-; RV32ZVE32F-NEXT: .LBB82_3: # %else4
+; RV32ZVE32F-NEXT: bnez a0, .LBB92_11
+; RV32ZVE32F-NEXT: .LBB92_3: # %else4
; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: bnez a0, .LBB82_12
-; RV32ZVE32F-NEXT: .LBB82_4: # %else6
+; RV32ZVE32F-NEXT: bnez a0, .LBB92_12
+; RV32ZVE32F-NEXT: .LBB92_4: # %else6
; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: bnez a0, .LBB82_13
-; RV32ZVE32F-NEXT: .LBB82_5: # %else8
+; RV32ZVE32F-NEXT: bnez a0, .LBB92_13
+; RV32ZVE32F-NEXT: .LBB92_5: # %else8
; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: bnez a0, .LBB82_14
-; RV32ZVE32F-NEXT: .LBB82_6: # %else10
+; RV32ZVE32F-NEXT: bnez a0, .LBB92_14
+; RV32ZVE32F-NEXT: .LBB92_6: # %else10
; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: bnez a0, .LBB82_15
-; RV32ZVE32F-NEXT: .LBB82_7: # %else12
+; RV32ZVE32F-NEXT: bnez a0, .LBB92_15
+; RV32ZVE32F-NEXT: .LBB92_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: bnez a0, .LBB82_16
-; RV32ZVE32F-NEXT: .LBB82_8: # %else14
+; RV32ZVE32F-NEXT: bnez a0, .LBB92_16
+; RV32ZVE32F-NEXT: .LBB92_8: # %else14
; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB82_9: # %cond.store
+; RV32ZVE32F-NEXT: .LBB92_9: # %cond.store
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB82_2
-; RV32ZVE32F-NEXT: .LBB82_10: # %cond.store1
+; RV32ZVE32F-NEXT: beqz a0, .LBB92_2
+; RV32ZVE32F-NEXT: .LBB92_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB82_3
-; RV32ZVE32F-NEXT: .LBB82_11: # %cond.store3
+; RV32ZVE32F-NEXT: beqz a0, .LBB92_3
+; RV32ZVE32F-NEXT: .LBB92_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB82_4
-; RV32ZVE32F-NEXT: .LBB82_12: # %cond.store5
+; RV32ZVE32F-NEXT: beqz a0, .LBB92_4
+; RV32ZVE32F-NEXT: .LBB92_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: beqz a0, .LBB82_5
-; RV32ZVE32F-NEXT: .LBB82_13: # %cond.store7
+; RV32ZVE32F-NEXT: beqz a0, .LBB92_5
+; RV32ZVE32F-NEXT: .LBB92_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: beqz a0, .LBB82_6
-; RV32ZVE32F-NEXT: .LBB82_14: # %cond.store9
+; RV32ZVE32F-NEXT: beqz a0, .LBB92_6
+; RV32ZVE32F-NEXT: .LBB92_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: beqz a0, .LBB82_7
-; RV32ZVE32F-NEXT: .LBB82_15: # %cond.store11
+; RV32ZVE32F-NEXT: beqz a0, .LBB92_7
+; RV32ZVE32F-NEXT: .LBB92_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: beqz a0, .LBB82_8
-; RV32ZVE32F-NEXT: .LBB82_16: # %cond.store13
+; RV32ZVE32F-NEXT: beqz a0, .LBB92_8
+; RV32ZVE32F-NEXT: .LBB92_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -8698,15 +14101,15 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
-; RV64ZVE32F-NEXT: beqz a2, .LBB82_2
+; RV64ZVE32F-NEXT: beqz a2, .LBB92_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
-; RV64ZVE32F-NEXT: .LBB82_2: # %else
+; RV64ZVE32F-NEXT: .LBB92_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB82_4
+; RV64ZVE32F-NEXT: beqz a2, .LBB92_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
@@ -8714,68 +14117,68 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
-; RV64ZVE32F-NEXT: .LBB82_4: # %else2
+; RV64ZVE32F-NEXT: .LBB92_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB82_12
+; RV64ZVE32F-NEXT: bnez a2, .LBB92_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB82_13
-; RV64ZVE32F-NEXT: .LBB82_6: # %else6
+; RV64ZVE32F-NEXT: bnez a2, .LBB92_13
+; RV64ZVE32F-NEXT: .LBB92_6: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB82_14
-; RV64ZVE32F-NEXT: .LBB82_7: # %else8
+; RV64ZVE32F-NEXT: bnez a2, .LBB92_14
+; RV64ZVE32F-NEXT: .LBB92_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB82_9
-; RV64ZVE32F-NEXT: .LBB82_8: # %cond.store9
+; RV64ZVE32F-NEXT: beqz a2, .LBB92_9
+; RV64ZVE32F-NEXT: .LBB92_8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
-; RV64ZVE32F-NEXT: .LBB82_9: # %else10
+; RV64ZVE32F-NEXT: .LBB92_9: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB82_15
+; RV64ZVE32F-NEXT: bnez a2, .LBB92_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB82_16
-; RV64ZVE32F-NEXT: .LBB82_11: # %else14
+; RV64ZVE32F-NEXT: bnez a1, .LBB92_16
+; RV64ZVE32F-NEXT: .LBB92_11: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB82_12: # %cond.store3
+; RV64ZVE32F-NEXT: .LBB92_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB82_6
-; RV64ZVE32F-NEXT: .LBB82_13: # %cond.store5
+; RV64ZVE32F-NEXT: beqz a2, .LBB92_6
+; RV64ZVE32F-NEXT: .LBB92_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB82_7
-; RV64ZVE32F-NEXT: .LBB82_14: # %cond.store7
+; RV64ZVE32F-NEXT: beqz a2, .LBB92_7
+; RV64ZVE32F-NEXT: .LBB92_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB82_8
-; RV64ZVE32F-NEXT: j .LBB82_9
-; RV64ZVE32F-NEXT: .LBB82_15: # %cond.store11
+; RV64ZVE32F-NEXT: bnez a2, .LBB92_8
+; RV64ZVE32F-NEXT: j .LBB92_9
+; RV64ZVE32F-NEXT: .LBB92_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB82_11
-; RV64ZVE32F-NEXT: .LBB82_16: # %cond.store13
+; RV64ZVE32F-NEXT: beqz a1, .LBB92_11
+; RV64ZVE32F-NEXT: .LBB92_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 3
@@ -8798,14 +14201,14 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV32V-NEXT: vsoxei16.v v8, (a0), v12, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8f64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vzext.vf2 v13, v12
-; RV64-NEXT: vsll.vi v12, v13, 3
-; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8f64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vzext.vf2 v13, v12
+; RV64V-NEXT: vsll.vi v12, v13, 3
+; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-NEXT: vsoxei16.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f64:
; RV32ZVE32F: # %bb.0:
@@ -8817,78 +14220,78 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
-; RV32ZVE32F-NEXT: bnez a2, .LBB83_9
+; RV32ZVE32F-NEXT: bnez a2, .LBB93_9
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: bnez a0, .LBB83_10
-; RV32ZVE32F-NEXT: .LBB83_2: # %else2
+; RV32ZVE32F-NEXT: bnez a0, .LBB93_10
+; RV32ZVE32F-NEXT: .LBB93_2: # %else2
; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: bnez a0, .LBB83_11
-; RV32ZVE32F-NEXT: .LBB83_3: # %else4
+; RV32ZVE32F-NEXT: bnez a0, .LBB93_11
+; RV32ZVE32F-NEXT: .LBB93_3: # %else4
; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: bnez a0, .LBB83_12
-; RV32ZVE32F-NEXT: .LBB83_4: # %else6
+; RV32ZVE32F-NEXT: bnez a0, .LBB93_12
+; RV32ZVE32F-NEXT: .LBB93_4: # %else6
; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: bnez a0, .LBB83_13
-; RV32ZVE32F-NEXT: .LBB83_5: # %else8
+; RV32ZVE32F-NEXT: bnez a0, .LBB93_13
+; RV32ZVE32F-NEXT: .LBB93_5: # %else8
; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: bnez a0, .LBB83_14
-; RV32ZVE32F-NEXT: .LBB83_6: # %else10
+; RV32ZVE32F-NEXT: bnez a0, .LBB93_14
+; RV32ZVE32F-NEXT: .LBB93_6: # %else10
; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: bnez a0, .LBB83_15
-; RV32ZVE32F-NEXT: .LBB83_7: # %else12
+; RV32ZVE32F-NEXT: bnez a0, .LBB93_15
+; RV32ZVE32F-NEXT: .LBB93_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: bnez a0, .LBB83_16
-; RV32ZVE32F-NEXT: .LBB83_8: # %else14
+; RV32ZVE32F-NEXT: bnez a0, .LBB93_16
+; RV32ZVE32F-NEXT: .LBB93_8: # %else14
; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB83_9: # %cond.store
+; RV32ZVE32F-NEXT: .LBB93_9: # %cond.store
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB83_2
-; RV32ZVE32F-NEXT: .LBB83_10: # %cond.store1
+; RV32ZVE32F-NEXT: beqz a0, .LBB93_2
+; RV32ZVE32F-NEXT: .LBB93_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB83_3
-; RV32ZVE32F-NEXT: .LBB83_11: # %cond.store3
+; RV32ZVE32F-NEXT: beqz a0, .LBB93_3
+; RV32ZVE32F-NEXT: .LBB93_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB83_4
-; RV32ZVE32F-NEXT: .LBB83_12: # %cond.store5
+; RV32ZVE32F-NEXT: beqz a0, .LBB93_4
+; RV32ZVE32F-NEXT: .LBB93_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: beqz a0, .LBB83_5
-; RV32ZVE32F-NEXT: .LBB83_13: # %cond.store7
+; RV32ZVE32F-NEXT: beqz a0, .LBB93_5
+; RV32ZVE32F-NEXT: .LBB93_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: beqz a0, .LBB83_6
-; RV32ZVE32F-NEXT: .LBB83_14: # %cond.store9
+; RV32ZVE32F-NEXT: beqz a0, .LBB93_6
+; RV32ZVE32F-NEXT: .LBB93_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: beqz a0, .LBB83_7
-; RV32ZVE32F-NEXT: .LBB83_15: # %cond.store11
+; RV32ZVE32F-NEXT: beqz a0, .LBB93_7
+; RV32ZVE32F-NEXT: .LBB93_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: beqz a0, .LBB83_8
-; RV32ZVE32F-NEXT: .LBB83_16: # %cond.store13
+; RV32ZVE32F-NEXT: beqz a0, .LBB93_8
+; RV32ZVE32F-NEXT: .LBB93_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -8900,16 +14303,16 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
-; RV64ZVE32F-NEXT: beqz a2, .LBB83_2
+; RV64ZVE32F-NEXT: beqz a2, .LBB93_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
-; RV64ZVE32F-NEXT: .LBB83_2: # %else
+; RV64ZVE32F-NEXT: .LBB93_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB83_4
+; RV64ZVE32F-NEXT: beqz a2, .LBB93_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
@@ -8918,47 +14321,47 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
-; RV64ZVE32F-NEXT: .LBB83_4: # %else2
+; RV64ZVE32F-NEXT: .LBB93_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB83_12
+; RV64ZVE32F-NEXT: bnez a2, .LBB93_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB83_13
-; RV64ZVE32F-NEXT: .LBB83_6: # %else6
+; RV64ZVE32F-NEXT: bnez a2, .LBB93_13
+; RV64ZVE32F-NEXT: .LBB93_6: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB83_14
-; RV64ZVE32F-NEXT: .LBB83_7: # %else8
+; RV64ZVE32F-NEXT: bnez a2, .LBB93_14
+; RV64ZVE32F-NEXT: .LBB93_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB83_9
-; RV64ZVE32F-NEXT: .LBB83_8: # %cond.store9
+; RV64ZVE32F-NEXT: beqz a2, .LBB93_9
+; RV64ZVE32F-NEXT: .LBB93_8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
-; RV64ZVE32F-NEXT: .LBB83_9: # %else10
+; RV64ZVE32F-NEXT: .LBB93_9: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB83_15
+; RV64ZVE32F-NEXT: bnez a2, .LBB93_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB83_16
-; RV64ZVE32F-NEXT: .LBB83_11: # %else14
+; RV64ZVE32F-NEXT: bnez a1, .LBB93_16
+; RV64ZVE32F-NEXT: .LBB93_11: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB83_12: # %cond.store3
+; RV64ZVE32F-NEXT: .LBB93_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB83_6
-; RV64ZVE32F-NEXT: .LBB83_13: # %cond.store5
+; RV64ZVE32F-NEXT: beqz a2, .LBB93_6
+; RV64ZVE32F-NEXT: .LBB93_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
@@ -8966,25 +14369,25 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB83_7
-; RV64ZVE32F-NEXT: .LBB83_14: # %cond.store7
+; RV64ZVE32F-NEXT: beqz a2, .LBB93_7
+; RV64ZVE32F-NEXT: .LBB93_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB83_8
-; RV64ZVE32F-NEXT: j .LBB83_9
-; RV64ZVE32F-NEXT: .LBB83_15: # %cond.store11
+; RV64ZVE32F-NEXT: bnez a2, .LBB93_8
+; RV64ZVE32F-NEXT: j .LBB93_9
+; RV64ZVE32F-NEXT: .LBB93_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB83_11
-; RV64ZVE32F-NEXT: .LBB83_16: # %cond.store13
+; RV64ZVE32F-NEXT: beqz a1, .LBB93_11
+; RV64ZVE32F-NEXT: .LBB93_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: andi a1, a1, 255
@@ -9008,13 +14411,13 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16
; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_v8i16_v8f64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf4 v16, v12
-; RV64-NEXT: vsll.vi v12, v16, 3
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_v8i16_v8f64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf4 v16, v12
+; RV64V-NEXT: vsll.vi v12, v16, 3
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_baseidx_v8i16_v8f64:
; RV32ZVE32F: # %bb.0:
@@ -9026,78 +14429,78 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
-; RV32ZVE32F-NEXT: bnez a2, .LBB84_9
+; RV32ZVE32F-NEXT: bnez a2, .LBB94_9
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: bnez a0, .LBB84_10
-; RV32ZVE32F-NEXT: .LBB84_2: # %else2
+; RV32ZVE32F-NEXT: bnez a0, .LBB94_10
+; RV32ZVE32F-NEXT: .LBB94_2: # %else2
; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: bnez a0, .LBB84_11
-; RV32ZVE32F-NEXT: .LBB84_3: # %else4
+; RV32ZVE32F-NEXT: bnez a0, .LBB94_11
+; RV32ZVE32F-NEXT: .LBB94_3: # %else4
; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: bnez a0, .LBB84_12
-; RV32ZVE32F-NEXT: .LBB84_4: # %else6
+; RV32ZVE32F-NEXT: bnez a0, .LBB94_12
+; RV32ZVE32F-NEXT: .LBB94_4: # %else6
; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: bnez a0, .LBB84_13
-; RV32ZVE32F-NEXT: .LBB84_5: # %else8
+; RV32ZVE32F-NEXT: bnez a0, .LBB94_13
+; RV32ZVE32F-NEXT: .LBB94_5: # %else8
; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: bnez a0, .LBB84_14
-; RV32ZVE32F-NEXT: .LBB84_6: # %else10
+; RV32ZVE32F-NEXT: bnez a0, .LBB94_14
+; RV32ZVE32F-NEXT: .LBB94_6: # %else10
; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: bnez a0, .LBB84_15
-; RV32ZVE32F-NEXT: .LBB84_7: # %else12
+; RV32ZVE32F-NEXT: bnez a0, .LBB94_15
+; RV32ZVE32F-NEXT: .LBB94_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: bnez a0, .LBB84_16
-; RV32ZVE32F-NEXT: .LBB84_8: # %else14
+; RV32ZVE32F-NEXT: bnez a0, .LBB94_16
+; RV32ZVE32F-NEXT: .LBB94_8: # %else14
; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB84_9: # %cond.store
+; RV32ZVE32F-NEXT: .LBB94_9: # %cond.store
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB84_2
-; RV32ZVE32F-NEXT: .LBB84_10: # %cond.store1
+; RV32ZVE32F-NEXT: beqz a0, .LBB94_2
+; RV32ZVE32F-NEXT: .LBB94_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB84_3
-; RV32ZVE32F-NEXT: .LBB84_11: # %cond.store3
+; RV32ZVE32F-NEXT: beqz a0, .LBB94_3
+; RV32ZVE32F-NEXT: .LBB94_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB84_4
-; RV32ZVE32F-NEXT: .LBB84_12: # %cond.store5
+; RV32ZVE32F-NEXT: beqz a0, .LBB94_4
+; RV32ZVE32F-NEXT: .LBB94_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: beqz a0, .LBB84_5
-; RV32ZVE32F-NEXT: .LBB84_13: # %cond.store7
+; RV32ZVE32F-NEXT: beqz a0, .LBB94_5
+; RV32ZVE32F-NEXT: .LBB94_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: beqz a0, .LBB84_6
-; RV32ZVE32F-NEXT: .LBB84_14: # %cond.store9
+; RV32ZVE32F-NEXT: beqz a0, .LBB94_6
+; RV32ZVE32F-NEXT: .LBB94_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: beqz a0, .LBB84_7
-; RV32ZVE32F-NEXT: .LBB84_15: # %cond.store11
+; RV32ZVE32F-NEXT: beqz a0, .LBB94_7
+; RV32ZVE32F-NEXT: .LBB94_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: beqz a0, .LBB84_8
-; RV32ZVE32F-NEXT: .LBB84_16: # %cond.store13
+; RV32ZVE32F-NEXT: beqz a0, .LBB94_8
+; RV32ZVE32F-NEXT: .LBB94_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -9109,16 +14512,16 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
-; RV64ZVE32F-NEXT: beqz a2, .LBB84_2
+; RV64ZVE32F-NEXT: beqz a2, .LBB94_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
-; RV64ZVE32F-NEXT: .LBB84_2: # %else
+; RV64ZVE32F-NEXT: .LBB94_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB84_4
+; RV64ZVE32F-NEXT: beqz a2, .LBB94_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
@@ -9126,68 +14529,68 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
-; RV64ZVE32F-NEXT: .LBB84_4: # %else2
+; RV64ZVE32F-NEXT: .LBB94_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB84_12
+; RV64ZVE32F-NEXT: bnez a2, .LBB94_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB84_13
-; RV64ZVE32F-NEXT: .LBB84_6: # %else6
+; RV64ZVE32F-NEXT: bnez a2, .LBB94_13
+; RV64ZVE32F-NEXT: .LBB94_6: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB84_14
-; RV64ZVE32F-NEXT: .LBB84_7: # %else8
+; RV64ZVE32F-NEXT: bnez a2, .LBB94_14
+; RV64ZVE32F-NEXT: .LBB94_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB84_9
-; RV64ZVE32F-NEXT: .LBB84_8: # %cond.store9
+; RV64ZVE32F-NEXT: beqz a2, .LBB94_9
+; RV64ZVE32F-NEXT: .LBB94_8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
-; RV64ZVE32F-NEXT: .LBB84_9: # %else10
+; RV64ZVE32F-NEXT: .LBB94_9: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB84_15
+; RV64ZVE32F-NEXT: bnez a2, .LBB94_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB84_16
-; RV64ZVE32F-NEXT: .LBB84_11: # %else14
+; RV64ZVE32F-NEXT: bnez a1, .LBB94_16
+; RV64ZVE32F-NEXT: .LBB94_11: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB84_12: # %cond.store3
+; RV64ZVE32F-NEXT: .LBB94_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB84_6
-; RV64ZVE32F-NEXT: .LBB84_13: # %cond.store5
+; RV64ZVE32F-NEXT: beqz a2, .LBB94_6
+; RV64ZVE32F-NEXT: .LBB94_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB84_7
-; RV64ZVE32F-NEXT: .LBB84_14: # %cond.store7
+; RV64ZVE32F-NEXT: beqz a2, .LBB94_7
+; RV64ZVE32F-NEXT: .LBB94_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB84_8
-; RV64ZVE32F-NEXT: j .LBB84_9
-; RV64ZVE32F-NEXT: .LBB84_15: # %cond.store11
+; RV64ZVE32F-NEXT: bnez a2, .LBB94_8
+; RV64ZVE32F-NEXT: j .LBB94_9
+; RV64ZVE32F-NEXT: .LBB94_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB84_11
-; RV64ZVE32F-NEXT: .LBB84_16: # %cond.store13
+; RV64ZVE32F-NEXT: beqz a1, .LBB94_11
+; RV64ZVE32F-NEXT: .LBB94_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 3
@@ -9209,13 +14612,13 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_sext_v8i16_v8f64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf4 v16, v12
-; RV64-NEXT: vsll.vi v12, v16, 3
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_sext_v8i16_v8f64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf4 v16, v12
+; RV64V-NEXT: vsll.vi v12, v16, 3
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8f64:
; RV32ZVE32F: # %bb.0:
@@ -9227,78 +14630,78 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
-; RV32ZVE32F-NEXT: bnez a2, .LBB85_9
+; RV32ZVE32F-NEXT: bnez a2, .LBB95_9
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: bnez a0, .LBB85_10
-; RV32ZVE32F-NEXT: .LBB85_2: # %else2
+; RV32ZVE32F-NEXT: bnez a0, .LBB95_10
+; RV32ZVE32F-NEXT: .LBB95_2: # %else2
; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: bnez a0, .LBB85_11
-; RV32ZVE32F-NEXT: .LBB85_3: # %else4
+; RV32ZVE32F-NEXT: bnez a0, .LBB95_11
+; RV32ZVE32F-NEXT: .LBB95_3: # %else4
; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: bnez a0, .LBB85_12
-; RV32ZVE32F-NEXT: .LBB85_4: # %else6
+; RV32ZVE32F-NEXT: bnez a0, .LBB95_12
+; RV32ZVE32F-NEXT: .LBB95_4: # %else6
; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: bnez a0, .LBB85_13
-; RV32ZVE32F-NEXT: .LBB85_5: # %else8
+; RV32ZVE32F-NEXT: bnez a0, .LBB95_13
+; RV32ZVE32F-NEXT: .LBB95_5: # %else8
; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: bnez a0, .LBB85_14
-; RV32ZVE32F-NEXT: .LBB85_6: # %else10
+; RV32ZVE32F-NEXT: bnez a0, .LBB95_14
+; RV32ZVE32F-NEXT: .LBB95_6: # %else10
; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: bnez a0, .LBB85_15
-; RV32ZVE32F-NEXT: .LBB85_7: # %else12
+; RV32ZVE32F-NEXT: bnez a0, .LBB95_15
+; RV32ZVE32F-NEXT: .LBB95_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: bnez a0, .LBB85_16
-; RV32ZVE32F-NEXT: .LBB85_8: # %else14
+; RV32ZVE32F-NEXT: bnez a0, .LBB95_16
+; RV32ZVE32F-NEXT: .LBB95_8: # %else14
; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB85_9: # %cond.store
+; RV32ZVE32F-NEXT: .LBB95_9: # %cond.store
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB85_2
-; RV32ZVE32F-NEXT: .LBB85_10: # %cond.store1
+; RV32ZVE32F-NEXT: beqz a0, .LBB95_2
+; RV32ZVE32F-NEXT: .LBB95_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB85_3
-; RV32ZVE32F-NEXT: .LBB85_11: # %cond.store3
+; RV32ZVE32F-NEXT: beqz a0, .LBB95_3
+; RV32ZVE32F-NEXT: .LBB95_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB85_4
-; RV32ZVE32F-NEXT: .LBB85_12: # %cond.store5
+; RV32ZVE32F-NEXT: beqz a0, .LBB95_4
+; RV32ZVE32F-NEXT: .LBB95_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: beqz a0, .LBB85_5
-; RV32ZVE32F-NEXT: .LBB85_13: # %cond.store7
+; RV32ZVE32F-NEXT: beqz a0, .LBB95_5
+; RV32ZVE32F-NEXT: .LBB95_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: beqz a0, .LBB85_6
-; RV32ZVE32F-NEXT: .LBB85_14: # %cond.store9
+; RV32ZVE32F-NEXT: beqz a0, .LBB95_6
+; RV32ZVE32F-NEXT: .LBB95_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: beqz a0, .LBB85_7
-; RV32ZVE32F-NEXT: .LBB85_15: # %cond.store11
+; RV32ZVE32F-NEXT: beqz a0, .LBB95_7
+; RV32ZVE32F-NEXT: .LBB95_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: beqz a0, .LBB85_8
-; RV32ZVE32F-NEXT: .LBB85_16: # %cond.store13
+; RV32ZVE32F-NEXT: beqz a0, .LBB95_8
+; RV32ZVE32F-NEXT: .LBB95_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -9310,16 +14713,16 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
-; RV64ZVE32F-NEXT: beqz a2, .LBB85_2
+; RV64ZVE32F-NEXT: beqz a2, .LBB95_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
-; RV64ZVE32F-NEXT: .LBB85_2: # %else
+; RV64ZVE32F-NEXT: .LBB95_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB85_4
+; RV64ZVE32F-NEXT: beqz a2, .LBB95_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
@@ -9327,68 +14730,68 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
-; RV64ZVE32F-NEXT: .LBB85_4: # %else2
+; RV64ZVE32F-NEXT: .LBB95_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB85_12
+; RV64ZVE32F-NEXT: bnez a2, .LBB95_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB85_13
-; RV64ZVE32F-NEXT: .LBB85_6: # %else6
+; RV64ZVE32F-NEXT: bnez a2, .LBB95_13
+; RV64ZVE32F-NEXT: .LBB95_6: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB85_14
-; RV64ZVE32F-NEXT: .LBB85_7: # %else8
+; RV64ZVE32F-NEXT: bnez a2, .LBB95_14
+; RV64ZVE32F-NEXT: .LBB95_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB85_9
-; RV64ZVE32F-NEXT: .LBB85_8: # %cond.store9
+; RV64ZVE32F-NEXT: beqz a2, .LBB95_9
+; RV64ZVE32F-NEXT: .LBB95_8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
-; RV64ZVE32F-NEXT: .LBB85_9: # %else10
+; RV64ZVE32F-NEXT: .LBB95_9: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB85_15
+; RV64ZVE32F-NEXT: bnez a2, .LBB95_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB85_16
-; RV64ZVE32F-NEXT: .LBB85_11: # %else14
+; RV64ZVE32F-NEXT: bnez a1, .LBB95_16
+; RV64ZVE32F-NEXT: .LBB95_11: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB85_12: # %cond.store3
+; RV64ZVE32F-NEXT: .LBB95_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB85_6
-; RV64ZVE32F-NEXT: .LBB85_13: # %cond.store5
+; RV64ZVE32F-NEXT: beqz a2, .LBB95_6
+; RV64ZVE32F-NEXT: .LBB95_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB85_7
-; RV64ZVE32F-NEXT: .LBB85_14: # %cond.store7
+; RV64ZVE32F-NEXT: beqz a2, .LBB95_7
+; RV64ZVE32F-NEXT: .LBB95_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB85_8
-; RV64ZVE32F-NEXT: j .LBB85_9
-; RV64ZVE32F-NEXT: .LBB85_15: # %cond.store11
+; RV64ZVE32F-NEXT: bnez a2, .LBB95_8
+; RV64ZVE32F-NEXT: j .LBB95_9
+; RV64ZVE32F-NEXT: .LBB95_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB85_11
-; RV64ZVE32F-NEXT: .LBB85_16: # %cond.store13
+; RV64ZVE32F-NEXT: beqz a1, .LBB95_11
+; RV64ZVE32F-NEXT: .LBB95_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 3
@@ -9411,14 +14814,14 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8f64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV64-NEXT: vzext.vf2 v14, v12
-; RV64-NEXT: vsll.vi v12, v14, 3
-; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_zext_v8i16_v8f64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64V-NEXT: vzext.vf2 v14, v12
+; RV64V-NEXT: vsll.vi v12, v14, 3
+; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; RV64V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f64:
; RV32ZVE32F: # %bb.0:
@@ -9430,78 +14833,78 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
-; RV32ZVE32F-NEXT: bnez a2, .LBB86_9
+; RV32ZVE32F-NEXT: bnez a2, .LBB96_9
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: bnez a0, .LBB86_10
-; RV32ZVE32F-NEXT: .LBB86_2: # %else2
+; RV32ZVE32F-NEXT: bnez a0, .LBB96_10
+; RV32ZVE32F-NEXT: .LBB96_2: # %else2
; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: bnez a0, .LBB86_11
-; RV32ZVE32F-NEXT: .LBB86_3: # %else4
+; RV32ZVE32F-NEXT: bnez a0, .LBB96_11
+; RV32ZVE32F-NEXT: .LBB96_3: # %else4
; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: bnez a0, .LBB86_12
-; RV32ZVE32F-NEXT: .LBB86_4: # %else6
+; RV32ZVE32F-NEXT: bnez a0, .LBB96_12
+; RV32ZVE32F-NEXT: .LBB96_4: # %else6
; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: bnez a0, .LBB86_13
-; RV32ZVE32F-NEXT: .LBB86_5: # %else8
+; RV32ZVE32F-NEXT: bnez a0, .LBB96_13
+; RV32ZVE32F-NEXT: .LBB96_5: # %else8
; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: bnez a0, .LBB86_14
-; RV32ZVE32F-NEXT: .LBB86_6: # %else10
+; RV32ZVE32F-NEXT: bnez a0, .LBB96_14
+; RV32ZVE32F-NEXT: .LBB96_6: # %else10
; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: bnez a0, .LBB86_15
-; RV32ZVE32F-NEXT: .LBB86_7: # %else12
+; RV32ZVE32F-NEXT: bnez a0, .LBB96_15
+; RV32ZVE32F-NEXT: .LBB96_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: bnez a0, .LBB86_16
-; RV32ZVE32F-NEXT: .LBB86_8: # %else14
+; RV32ZVE32F-NEXT: bnez a0, .LBB96_16
+; RV32ZVE32F-NEXT: .LBB96_8: # %else14
; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB86_9: # %cond.store
+; RV32ZVE32F-NEXT: .LBB96_9: # %cond.store
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB86_2
-; RV32ZVE32F-NEXT: .LBB86_10: # %cond.store1
+; RV32ZVE32F-NEXT: beqz a0, .LBB96_2
+; RV32ZVE32F-NEXT: .LBB96_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB86_3
-; RV32ZVE32F-NEXT: .LBB86_11: # %cond.store3
+; RV32ZVE32F-NEXT: beqz a0, .LBB96_3
+; RV32ZVE32F-NEXT: .LBB96_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB86_4
-; RV32ZVE32F-NEXT: .LBB86_12: # %cond.store5
+; RV32ZVE32F-NEXT: beqz a0, .LBB96_4
+; RV32ZVE32F-NEXT: .LBB96_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: beqz a0, .LBB86_5
-; RV32ZVE32F-NEXT: .LBB86_13: # %cond.store7
+; RV32ZVE32F-NEXT: beqz a0, .LBB96_5
+; RV32ZVE32F-NEXT: .LBB96_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: beqz a0, .LBB86_6
-; RV32ZVE32F-NEXT: .LBB86_14: # %cond.store9
+; RV32ZVE32F-NEXT: beqz a0, .LBB96_6
+; RV32ZVE32F-NEXT: .LBB96_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: beqz a0, .LBB86_7
-; RV32ZVE32F-NEXT: .LBB86_15: # %cond.store11
+; RV32ZVE32F-NEXT: beqz a0, .LBB96_7
+; RV32ZVE32F-NEXT: .LBB96_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: beqz a0, .LBB86_8
-; RV32ZVE32F-NEXT: .LBB86_16: # %cond.store13
+; RV32ZVE32F-NEXT: beqz a0, .LBB96_8
+; RV32ZVE32F-NEXT: .LBB96_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -9515,7 +14918,7 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: addiw a1, a1, -1
-; RV64ZVE32F-NEXT: beqz a3, .LBB86_2
+; RV64ZVE32F-NEXT: beqz a3, .LBB96_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
@@ -9523,9 +14926,9 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: fsd fa0, 0(a3)
-; RV64ZVE32F-NEXT: .LBB86_2: # %else
+; RV64ZVE32F-NEXT: .LBB96_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
-; RV64ZVE32F-NEXT: beqz a3, .LBB86_4
+; RV64ZVE32F-NEXT: beqz a3, .LBB96_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
@@ -9534,47 +14937,47 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: fsd fa1, 0(a3)
-; RV64ZVE32F-NEXT: .LBB86_4: # %else2
+; RV64ZVE32F-NEXT: .LBB96_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB86_12
+; RV64ZVE32F-NEXT: bnez a3, .LBB96_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: bnez a3, .LBB86_13
-; RV64ZVE32F-NEXT: .LBB86_6: # %else6
+; RV64ZVE32F-NEXT: bnez a3, .LBB96_13
+; RV64ZVE32F-NEXT: .LBB96_6: # %else6
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: bnez a3, .LBB86_14
-; RV64ZVE32F-NEXT: .LBB86_7: # %else8
+; RV64ZVE32F-NEXT: bnez a3, .LBB96_14
+; RV64ZVE32F-NEXT: .LBB96_7: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: beqz a3, .LBB86_9
-; RV64ZVE32F-NEXT: .LBB86_8: # %cond.store9
+; RV64ZVE32F-NEXT: beqz a3, .LBB96_9
+; RV64ZVE32F-NEXT: .LBB96_8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: fsd fa5, 0(a3)
-; RV64ZVE32F-NEXT: .LBB86_9: # %else10
+; RV64ZVE32F-NEXT: .LBB96_9: # %else10
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
-; RV64ZVE32F-NEXT: bnez a3, .LBB86_15
+; RV64ZVE32F-NEXT: bnez a3, .LBB96_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
; RV64ZVE32F-NEXT: andi a2, a2, -128
-; RV64ZVE32F-NEXT: bnez a2, .LBB86_16
-; RV64ZVE32F-NEXT: .LBB86_11: # %else14
+; RV64ZVE32F-NEXT: bnez a2, .LBB96_16
+; RV64ZVE32F-NEXT: .LBB96_11: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB86_12: # %cond.store3
+; RV64ZVE32F-NEXT: .LBB96_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: fsd fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
-; RV64ZVE32F-NEXT: beqz a3, .LBB86_6
-; RV64ZVE32F-NEXT: .LBB86_13: # %cond.store5
+; RV64ZVE32F-NEXT: beqz a3, .LBB96_6
+; RV64ZVE32F-NEXT: .LBB96_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a1
@@ -9582,25 +14985,25 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: fsd fa3, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
-; RV64ZVE32F-NEXT: beqz a3, .LBB86_7
-; RV64ZVE32F-NEXT: .LBB86_14: # %cond.store7
+; RV64ZVE32F-NEXT: beqz a3, .LBB96_7
+; RV64ZVE32F-NEXT: .LBB96_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: fsd fa4, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
-; RV64ZVE32F-NEXT: bnez a3, .LBB86_8
-; RV64ZVE32F-NEXT: j .LBB86_9
-; RV64ZVE32F-NEXT: .LBB86_15: # %cond.store11
+; RV64ZVE32F-NEXT: bnez a3, .LBB96_8
+; RV64ZVE32F-NEXT: j .LBB96_9
+; RV64ZVE32F-NEXT: .LBB96_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: fsd fa6, 0(a3)
; RV64ZVE32F-NEXT: andi a2, a2, -128
-; RV64ZVE32F-NEXT: beqz a2, .LBB86_11
-; RV64ZVE32F-NEXT: .LBB86_16: # %cond.store13
+; RV64ZVE32F-NEXT: beqz a2, .LBB96_11
+; RV64ZVE32F-NEXT: .LBB96_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: and a1, a2, a1
@@ -9623,13 +15026,13 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32
; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_v8i32_v8f64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf2 v16, v12
-; RV64-NEXT: vsll.vi v12, v16, 3
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_v8i32_v8f64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf2 v16, v12
+; RV64V-NEXT: vsll.vi v12, v16, 3
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_baseidx_v8i32_v8f64:
; RV32ZVE32F: # %bb.0:
@@ -9640,78 +15043,78 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
-; RV32ZVE32F-NEXT: bnez a2, .LBB87_9
+; RV32ZVE32F-NEXT: bnez a2, .LBB97_9
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: bnez a0, .LBB87_10
-; RV32ZVE32F-NEXT: .LBB87_2: # %else2
+; RV32ZVE32F-NEXT: bnez a0, .LBB97_10
+; RV32ZVE32F-NEXT: .LBB97_2: # %else2
; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: bnez a0, .LBB87_11
-; RV32ZVE32F-NEXT: .LBB87_3: # %else4
+; RV32ZVE32F-NEXT: bnez a0, .LBB97_11
+; RV32ZVE32F-NEXT: .LBB97_3: # %else4
; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: bnez a0, .LBB87_12
-; RV32ZVE32F-NEXT: .LBB87_4: # %else6
+; RV32ZVE32F-NEXT: bnez a0, .LBB97_12
+; RV32ZVE32F-NEXT: .LBB97_4: # %else6
; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: bnez a0, .LBB87_13
-; RV32ZVE32F-NEXT: .LBB87_5: # %else8
+; RV32ZVE32F-NEXT: bnez a0, .LBB97_13
+; RV32ZVE32F-NEXT: .LBB97_5: # %else8
; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: bnez a0, .LBB87_14
-; RV32ZVE32F-NEXT: .LBB87_6: # %else10
+; RV32ZVE32F-NEXT: bnez a0, .LBB97_14
+; RV32ZVE32F-NEXT: .LBB97_6: # %else10
; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: bnez a0, .LBB87_15
-; RV32ZVE32F-NEXT: .LBB87_7: # %else12
+; RV32ZVE32F-NEXT: bnez a0, .LBB97_15
+; RV32ZVE32F-NEXT: .LBB97_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: bnez a0, .LBB87_16
-; RV32ZVE32F-NEXT: .LBB87_8: # %else14
+; RV32ZVE32F-NEXT: bnez a0, .LBB97_16
+; RV32ZVE32F-NEXT: .LBB97_8: # %else14
; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB87_9: # %cond.store
+; RV32ZVE32F-NEXT: .LBB97_9: # %cond.store
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB87_2
-; RV32ZVE32F-NEXT: .LBB87_10: # %cond.store1
+; RV32ZVE32F-NEXT: beqz a0, .LBB97_2
+; RV32ZVE32F-NEXT: .LBB97_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB87_3
-; RV32ZVE32F-NEXT: .LBB87_11: # %cond.store3
+; RV32ZVE32F-NEXT: beqz a0, .LBB97_3
+; RV32ZVE32F-NEXT: .LBB97_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB87_4
-; RV32ZVE32F-NEXT: .LBB87_12: # %cond.store5
+; RV32ZVE32F-NEXT: beqz a0, .LBB97_4
+; RV32ZVE32F-NEXT: .LBB97_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: beqz a0, .LBB87_5
-; RV32ZVE32F-NEXT: .LBB87_13: # %cond.store7
+; RV32ZVE32F-NEXT: beqz a0, .LBB97_5
+; RV32ZVE32F-NEXT: .LBB97_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: beqz a0, .LBB87_6
-; RV32ZVE32F-NEXT: .LBB87_14: # %cond.store9
+; RV32ZVE32F-NEXT: beqz a0, .LBB97_6
+; RV32ZVE32F-NEXT: .LBB97_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: beqz a0, .LBB87_7
-; RV32ZVE32F-NEXT: .LBB87_15: # %cond.store11
+; RV32ZVE32F-NEXT: beqz a0, .LBB97_7
+; RV32ZVE32F-NEXT: .LBB97_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: beqz a0, .LBB87_8
-; RV32ZVE32F-NEXT: .LBB87_16: # %cond.store13
+; RV32ZVE32F-NEXT: beqz a0, .LBB97_8
+; RV32ZVE32F-NEXT: .LBB97_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -9723,16 +15126,16 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
-; RV64ZVE32F-NEXT: beqz a2, .LBB87_2
+; RV64ZVE32F-NEXT: beqz a2, .LBB97_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
-; RV64ZVE32F-NEXT: .LBB87_2: # %else
+; RV64ZVE32F-NEXT: .LBB97_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB87_4
+; RV64ZVE32F-NEXT: beqz a2, .LBB97_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
@@ -9740,68 +15143,68 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
-; RV64ZVE32F-NEXT: .LBB87_4: # %else2
+; RV64ZVE32F-NEXT: .LBB97_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB87_12
+; RV64ZVE32F-NEXT: bnez a2, .LBB97_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB87_13
-; RV64ZVE32F-NEXT: .LBB87_6: # %else6
+; RV64ZVE32F-NEXT: bnez a2, .LBB97_13
+; RV64ZVE32F-NEXT: .LBB97_6: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB87_14
-; RV64ZVE32F-NEXT: .LBB87_7: # %else8
+; RV64ZVE32F-NEXT: bnez a2, .LBB97_14
+; RV64ZVE32F-NEXT: .LBB97_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB87_9
-; RV64ZVE32F-NEXT: .LBB87_8: # %cond.store9
+; RV64ZVE32F-NEXT: beqz a2, .LBB97_9
+; RV64ZVE32F-NEXT: .LBB97_8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
-; RV64ZVE32F-NEXT: .LBB87_9: # %else10
+; RV64ZVE32F-NEXT: .LBB97_9: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB87_15
+; RV64ZVE32F-NEXT: bnez a2, .LBB97_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB87_16
-; RV64ZVE32F-NEXT: .LBB87_11: # %else14
+; RV64ZVE32F-NEXT: bnez a1, .LBB97_16
+; RV64ZVE32F-NEXT: .LBB97_11: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB87_12: # %cond.store3
+; RV64ZVE32F-NEXT: .LBB97_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB87_6
-; RV64ZVE32F-NEXT: .LBB87_13: # %cond.store5
+; RV64ZVE32F-NEXT: beqz a2, .LBB97_6
+; RV64ZVE32F-NEXT: .LBB97_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB87_7
-; RV64ZVE32F-NEXT: .LBB87_14: # %cond.store7
+; RV64ZVE32F-NEXT: beqz a2, .LBB97_7
+; RV64ZVE32F-NEXT: .LBB97_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB87_8
-; RV64ZVE32F-NEXT: j .LBB87_9
-; RV64ZVE32F-NEXT: .LBB87_15: # %cond.store11
+; RV64ZVE32F-NEXT: bnez a2, .LBB97_8
+; RV64ZVE32F-NEXT: j .LBB97_9
+; RV64ZVE32F-NEXT: .LBB97_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB87_11
-; RV64ZVE32F-NEXT: .LBB87_16: # %cond.store13
+; RV64ZVE32F-NEXT: beqz a1, .LBB97_11
+; RV64ZVE32F-NEXT: .LBB97_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 3
@@ -9822,13 +15225,13 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_sext_v8i32_v8f64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsext.vf2 v16, v12
-; RV64-NEXT: vsll.vi v12, v16, 3
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_sext_v8i32_v8f64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf2 v16, v12
+; RV64V-NEXT: vsll.vi v12, v16, 3
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8f64:
; RV32ZVE32F: # %bb.0:
@@ -9839,78 +15242,78 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
-; RV32ZVE32F-NEXT: bnez a2, .LBB88_9
+; RV32ZVE32F-NEXT: bnez a2, .LBB98_9
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: bnez a0, .LBB88_10
-; RV32ZVE32F-NEXT: .LBB88_2: # %else2
+; RV32ZVE32F-NEXT: bnez a0, .LBB98_10
+; RV32ZVE32F-NEXT: .LBB98_2: # %else2
; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: bnez a0, .LBB88_11
-; RV32ZVE32F-NEXT: .LBB88_3: # %else4
+; RV32ZVE32F-NEXT: bnez a0, .LBB98_11
+; RV32ZVE32F-NEXT: .LBB98_3: # %else4
; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: bnez a0, .LBB88_12
-; RV32ZVE32F-NEXT: .LBB88_4: # %else6
+; RV32ZVE32F-NEXT: bnez a0, .LBB98_12
+; RV32ZVE32F-NEXT: .LBB98_4: # %else6
; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: bnez a0, .LBB88_13
-; RV32ZVE32F-NEXT: .LBB88_5: # %else8
+; RV32ZVE32F-NEXT: bnez a0, .LBB98_13
+; RV32ZVE32F-NEXT: .LBB98_5: # %else8
; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: bnez a0, .LBB88_14
-; RV32ZVE32F-NEXT: .LBB88_6: # %else10
+; RV32ZVE32F-NEXT: bnez a0, .LBB98_14
+; RV32ZVE32F-NEXT: .LBB98_6: # %else10
; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: bnez a0, .LBB88_15
-; RV32ZVE32F-NEXT: .LBB88_7: # %else12
+; RV32ZVE32F-NEXT: bnez a0, .LBB98_15
+; RV32ZVE32F-NEXT: .LBB98_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: bnez a0, .LBB88_16
-; RV32ZVE32F-NEXT: .LBB88_8: # %else14
+; RV32ZVE32F-NEXT: bnez a0, .LBB98_16
+; RV32ZVE32F-NEXT: .LBB98_8: # %else14
; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB88_9: # %cond.store
+; RV32ZVE32F-NEXT: .LBB98_9: # %cond.store
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB88_2
-; RV32ZVE32F-NEXT: .LBB88_10: # %cond.store1
+; RV32ZVE32F-NEXT: beqz a0, .LBB98_2
+; RV32ZVE32F-NEXT: .LBB98_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB88_3
-; RV32ZVE32F-NEXT: .LBB88_11: # %cond.store3
+; RV32ZVE32F-NEXT: beqz a0, .LBB98_3
+; RV32ZVE32F-NEXT: .LBB98_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB88_4
-; RV32ZVE32F-NEXT: .LBB88_12: # %cond.store5
+; RV32ZVE32F-NEXT: beqz a0, .LBB98_4
+; RV32ZVE32F-NEXT: .LBB98_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: beqz a0, .LBB88_5
-; RV32ZVE32F-NEXT: .LBB88_13: # %cond.store7
+; RV32ZVE32F-NEXT: beqz a0, .LBB98_5
+; RV32ZVE32F-NEXT: .LBB98_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: beqz a0, .LBB88_6
-; RV32ZVE32F-NEXT: .LBB88_14: # %cond.store9
+; RV32ZVE32F-NEXT: beqz a0, .LBB98_6
+; RV32ZVE32F-NEXT: .LBB98_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: beqz a0, .LBB88_7
-; RV32ZVE32F-NEXT: .LBB88_15: # %cond.store11
+; RV32ZVE32F-NEXT: beqz a0, .LBB98_7
+; RV32ZVE32F-NEXT: .LBB98_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: beqz a0, .LBB88_8
-; RV32ZVE32F-NEXT: .LBB88_16: # %cond.store13
+; RV32ZVE32F-NEXT: beqz a0, .LBB98_8
+; RV32ZVE32F-NEXT: .LBB98_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -9922,16 +15325,16 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
-; RV64ZVE32F-NEXT: beqz a2, .LBB88_2
+; RV64ZVE32F-NEXT: beqz a2, .LBB98_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
-; RV64ZVE32F-NEXT: .LBB88_2: # %else
+; RV64ZVE32F-NEXT: .LBB98_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB88_4
+; RV64ZVE32F-NEXT: beqz a2, .LBB98_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
@@ -9939,68 +15342,68 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
-; RV64ZVE32F-NEXT: .LBB88_4: # %else2
+; RV64ZVE32F-NEXT: .LBB98_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB88_12
+; RV64ZVE32F-NEXT: bnez a2, .LBB98_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB88_13
-; RV64ZVE32F-NEXT: .LBB88_6: # %else6
+; RV64ZVE32F-NEXT: bnez a2, .LBB98_13
+; RV64ZVE32F-NEXT: .LBB98_6: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB88_14
-; RV64ZVE32F-NEXT: .LBB88_7: # %else8
+; RV64ZVE32F-NEXT: bnez a2, .LBB98_14
+; RV64ZVE32F-NEXT: .LBB98_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB88_9
-; RV64ZVE32F-NEXT: .LBB88_8: # %cond.store9
+; RV64ZVE32F-NEXT: beqz a2, .LBB98_9
+; RV64ZVE32F-NEXT: .LBB98_8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
-; RV64ZVE32F-NEXT: .LBB88_9: # %else10
+; RV64ZVE32F-NEXT: .LBB98_9: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB88_15
+; RV64ZVE32F-NEXT: bnez a2, .LBB98_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB88_16
-; RV64ZVE32F-NEXT: .LBB88_11: # %else14
+; RV64ZVE32F-NEXT: bnez a1, .LBB98_16
+; RV64ZVE32F-NEXT: .LBB98_11: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB88_12: # %cond.store3
+; RV64ZVE32F-NEXT: .LBB98_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB88_6
-; RV64ZVE32F-NEXT: .LBB88_13: # %cond.store5
+; RV64ZVE32F-NEXT: beqz a2, .LBB98_6
+; RV64ZVE32F-NEXT: .LBB98_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB88_7
-; RV64ZVE32F-NEXT: .LBB88_14: # %cond.store7
+; RV64ZVE32F-NEXT: beqz a2, .LBB98_7
+; RV64ZVE32F-NEXT: .LBB98_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB88_8
-; RV64ZVE32F-NEXT: j .LBB88_9
-; RV64ZVE32F-NEXT: .LBB88_15: # %cond.store11
+; RV64ZVE32F-NEXT: bnez a2, .LBB98_8
+; RV64ZVE32F-NEXT: j .LBB98_9
+; RV64ZVE32F-NEXT: .LBB98_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB88_11
-; RV64ZVE32F-NEXT: .LBB88_16: # %cond.store13
+; RV64ZVE32F-NEXT: beqz a1, .LBB98_11
+; RV64ZVE32F-NEXT: .LBB98_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 3
@@ -10022,13 +15425,13 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_zext_v8i32_v8f64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vzext.vf2 v16, v12
-; RV64-NEXT: vsll.vi v12, v16, 3
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_zext_v8i32_v8f64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vzext.vf2 v16, v12
+; RV64V-NEXT: vsll.vi v12, v16, 3
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8f64:
; RV32ZVE32F: # %bb.0:
@@ -10039,78 +15442,78 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
-; RV32ZVE32F-NEXT: bnez a2, .LBB89_9
+; RV32ZVE32F-NEXT: bnez a2, .LBB99_9
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: bnez a0, .LBB89_10
-; RV32ZVE32F-NEXT: .LBB89_2: # %else2
+; RV32ZVE32F-NEXT: bnez a0, .LBB99_10
+; RV32ZVE32F-NEXT: .LBB99_2: # %else2
; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: bnez a0, .LBB89_11
-; RV32ZVE32F-NEXT: .LBB89_3: # %else4
+; RV32ZVE32F-NEXT: bnez a0, .LBB99_11
+; RV32ZVE32F-NEXT: .LBB99_3: # %else4
; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: bnez a0, .LBB89_12
-; RV32ZVE32F-NEXT: .LBB89_4: # %else6
+; RV32ZVE32F-NEXT: bnez a0, .LBB99_12
+; RV32ZVE32F-NEXT: .LBB99_4: # %else6
; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: bnez a0, .LBB89_13
-; RV32ZVE32F-NEXT: .LBB89_5: # %else8
+; RV32ZVE32F-NEXT: bnez a0, .LBB99_13
+; RV32ZVE32F-NEXT: .LBB99_5: # %else8
; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: bnez a0, .LBB89_14
-; RV32ZVE32F-NEXT: .LBB89_6: # %else10
+; RV32ZVE32F-NEXT: bnez a0, .LBB99_14
+; RV32ZVE32F-NEXT: .LBB99_6: # %else10
; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: bnez a0, .LBB89_15
-; RV32ZVE32F-NEXT: .LBB89_7: # %else12
+; RV32ZVE32F-NEXT: bnez a0, .LBB99_15
+; RV32ZVE32F-NEXT: .LBB99_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: bnez a0, .LBB89_16
-; RV32ZVE32F-NEXT: .LBB89_8: # %else14
+; RV32ZVE32F-NEXT: bnez a0, .LBB99_16
+; RV32ZVE32F-NEXT: .LBB99_8: # %else14
; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB89_9: # %cond.store
+; RV32ZVE32F-NEXT: .LBB99_9: # %cond.store
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB89_2
-; RV32ZVE32F-NEXT: .LBB89_10: # %cond.store1
+; RV32ZVE32F-NEXT: beqz a0, .LBB99_2
+; RV32ZVE32F-NEXT: .LBB99_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB89_3
-; RV32ZVE32F-NEXT: .LBB89_11: # %cond.store3
+; RV32ZVE32F-NEXT: beqz a0, .LBB99_3
+; RV32ZVE32F-NEXT: .LBB99_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB89_4
-; RV32ZVE32F-NEXT: .LBB89_12: # %cond.store5
+; RV32ZVE32F-NEXT: beqz a0, .LBB99_4
+; RV32ZVE32F-NEXT: .LBB99_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: beqz a0, .LBB89_5
-; RV32ZVE32F-NEXT: .LBB89_13: # %cond.store7
+; RV32ZVE32F-NEXT: beqz a0, .LBB99_5
+; RV32ZVE32F-NEXT: .LBB99_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: beqz a0, .LBB89_6
-; RV32ZVE32F-NEXT: .LBB89_14: # %cond.store9
+; RV32ZVE32F-NEXT: beqz a0, .LBB99_6
+; RV32ZVE32F-NEXT: .LBB99_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: beqz a0, .LBB89_7
-; RV32ZVE32F-NEXT: .LBB89_15: # %cond.store11
+; RV32ZVE32F-NEXT: beqz a0, .LBB99_7
+; RV32ZVE32F-NEXT: .LBB99_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: beqz a0, .LBB89_8
-; RV32ZVE32F-NEXT: .LBB89_16: # %cond.store13
+; RV32ZVE32F-NEXT: beqz a0, .LBB99_8
+; RV32ZVE32F-NEXT: .LBB99_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -10122,7 +15525,7 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
-; RV64ZVE32F-NEXT: beqz a2, .LBB89_2
+; RV64ZVE32F-NEXT: beqz a2, .LBB99_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
@@ -10130,9 +15533,9 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: srli a2, a2, 29
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
-; RV64ZVE32F-NEXT: .LBB89_2: # %else
+; RV64ZVE32F-NEXT: .LBB99_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB89_4
+; RV64ZVE32F-NEXT: beqz a2, .LBB99_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
@@ -10141,47 +15544,47 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: srli a2, a2, 29
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
-; RV64ZVE32F-NEXT: .LBB89_4: # %else2
+; RV64ZVE32F-NEXT: .LBB99_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB89_12
+; RV64ZVE32F-NEXT: bnez a2, .LBB99_12
; RV64ZVE32F-NEXT: # %bb.5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB89_13
-; RV64ZVE32F-NEXT: .LBB89_6: # %else6
+; RV64ZVE32F-NEXT: bnez a2, .LBB99_13
+; RV64ZVE32F-NEXT: .LBB99_6: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB89_14
-; RV64ZVE32F-NEXT: .LBB89_7: # %else8
+; RV64ZVE32F-NEXT: bnez a2, .LBB99_14
+; RV64ZVE32F-NEXT: .LBB99_7: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: beqz a2, .LBB89_9
-; RV64ZVE32F-NEXT: .LBB89_8: # %cond.store9
+; RV64ZVE32F-NEXT: beqz a2, .LBB99_9
+; RV64ZVE32F-NEXT: .LBB99_8: # %cond.store9
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 32
; RV64ZVE32F-NEXT: srli a2, a2, 29
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
-; RV64ZVE32F-NEXT: .LBB89_9: # %else10
+; RV64ZVE32F-NEXT: .LBB99_9: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB89_15
+; RV64ZVE32F-NEXT: bnez a2, .LBB99_15
; RV64ZVE32F-NEXT: # %bb.10: # %else12
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB89_16
-; RV64ZVE32F-NEXT: .LBB89_11: # %else14
+; RV64ZVE32F-NEXT: bnez a1, .LBB99_16
+; RV64ZVE32F-NEXT: .LBB99_11: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB89_12: # %cond.store3
+; RV64ZVE32F-NEXT: .LBB99_12: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 32
; RV64ZVE32F-NEXT: srli a2, a2, 29
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB89_6
-; RV64ZVE32F-NEXT: .LBB89_13: # %cond.store5
+; RV64ZVE32F-NEXT: beqz a2, .LBB99_6
+; RV64ZVE32F-NEXT: .LBB99_13: # %cond.store5
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 32
@@ -10189,25 +15592,25 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB89_7
-; RV64ZVE32F-NEXT: .LBB89_14: # %cond.store7
+; RV64ZVE32F-NEXT: beqz a2, .LBB99_7
+; RV64ZVE32F-NEXT: .LBB99_14: # %cond.store7
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 32
; RV64ZVE32F-NEXT: srli a2, a2, 29
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
; RV64ZVE32F-NEXT: andi a2, a1, 32
-; RV64ZVE32F-NEXT: bnez a2, .LBB89_8
-; RV64ZVE32F-NEXT: j .LBB89_9
-; RV64ZVE32F-NEXT: .LBB89_15: # %cond.store11
+; RV64ZVE32F-NEXT: bnez a2, .LBB99_8
+; RV64ZVE32F-NEXT: j .LBB99_9
+; RV64ZVE32F-NEXT: .LBB99_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 32
; RV64ZVE32F-NEXT: srli a2, a2, 29
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
; RV64ZVE32F-NEXT: andi a1, a1, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB89_11
-; RV64ZVE32F-NEXT: .LBB89_16: # %cond.store13
+; RV64ZVE32F-NEXT: beqz a1, .LBB99_11
+; RV64ZVE32F-NEXT: .LBB99_16: # %cond.store13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 32
@@ -10231,12 +15634,12 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idx
; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
; RV32V-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_v8f64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vsll.vi v12, v12, 3
-; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_v8f64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsll.vi v12, v12, 3
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_baseidx_v8f64:
; RV32ZVE32F: # %bb.0:
@@ -10263,78 +15666,78 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idx
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
-; RV32ZVE32F-NEXT: bnez a2, .LBB90_9
+; RV32ZVE32F-NEXT: bnez a2, .LBB100_9
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: bnez a0, .LBB90_10
-; RV32ZVE32F-NEXT: .LBB90_2: # %else2
+; RV32ZVE32F-NEXT: bnez a0, .LBB100_10
+; RV32ZVE32F-NEXT: .LBB100_2: # %else2
; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: bnez a0, .LBB90_11
-; RV32ZVE32F-NEXT: .LBB90_3: # %else4
+; RV32ZVE32F-NEXT: bnez a0, .LBB100_11
+; RV32ZVE32F-NEXT: .LBB100_3: # %else4
; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: bnez a0, .LBB90_12
-; RV32ZVE32F-NEXT: .LBB90_4: # %else6
+; RV32ZVE32F-NEXT: bnez a0, .LBB100_12
+; RV32ZVE32F-NEXT: .LBB100_4: # %else6
; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: bnez a0, .LBB90_13
-; RV32ZVE32F-NEXT: .LBB90_5: # %else8
+; RV32ZVE32F-NEXT: bnez a0, .LBB100_13
+; RV32ZVE32F-NEXT: .LBB100_5: # %else8
; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: bnez a0, .LBB90_14
-; RV32ZVE32F-NEXT: .LBB90_6: # %else10
+; RV32ZVE32F-NEXT: bnez a0, .LBB100_14
+; RV32ZVE32F-NEXT: .LBB100_6: # %else10
; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: bnez a0, .LBB90_15
-; RV32ZVE32F-NEXT: .LBB90_7: # %else12
+; RV32ZVE32F-NEXT: bnez a0, .LBB100_15
+; RV32ZVE32F-NEXT: .LBB100_7: # %else12
; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: bnez a0, .LBB90_16
-; RV32ZVE32F-NEXT: .LBB90_8: # %else14
+; RV32ZVE32F-NEXT: bnez a0, .LBB100_16
+; RV32ZVE32F-NEXT: .LBB100_8: # %else14
; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB90_9: # %cond.store
+; RV32ZVE32F-NEXT: .LBB100_9: # %cond.store
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB90_2
-; RV32ZVE32F-NEXT: .LBB90_10: # %cond.store1
+; RV32ZVE32F-NEXT: beqz a0, .LBB100_2
+; RV32ZVE32F-NEXT: .LBB100_10: # %cond.store1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 4
-; RV32ZVE32F-NEXT: beqz a0, .LBB90_3
-; RV32ZVE32F-NEXT: .LBB90_11: # %cond.store3
+; RV32ZVE32F-NEXT: beqz a0, .LBB100_3
+; RV32ZVE32F-NEXT: .LBB100_11: # %cond.store3
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 8
-; RV32ZVE32F-NEXT: beqz a0, .LBB90_4
-; RV32ZVE32F-NEXT: .LBB90_12: # %cond.store5
+; RV32ZVE32F-NEXT: beqz a0, .LBB100_4
+; RV32ZVE32F-NEXT: .LBB100_12: # %cond.store5
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 16
-; RV32ZVE32F-NEXT: beqz a0, .LBB90_5
-; RV32ZVE32F-NEXT: .LBB90_13: # %cond.store7
+; RV32ZVE32F-NEXT: beqz a0, .LBB100_5
+; RV32ZVE32F-NEXT: .LBB100_13: # %cond.store7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 32
-; RV32ZVE32F-NEXT: beqz a0, .LBB90_6
-; RV32ZVE32F-NEXT: .LBB90_14: # %cond.store9
+; RV32ZVE32F-NEXT: beqz a0, .LBB100_6
+; RV32ZVE32F-NEXT: .LBB100_14: # %cond.store9
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, 64
-; RV32ZVE32F-NEXT: beqz a0, .LBB90_7
-; RV32ZVE32F-NEXT: .LBB90_15: # %cond.store11
+; RV32ZVE32F-NEXT: beqz a0, .LBB100_7
+; RV32ZVE32F-NEXT: .LBB100_15: # %cond.store11
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a0, v10
; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
; RV32ZVE32F-NEXT: andi a0, a1, -128
-; RV32ZVE32F-NEXT: beqz a0, .LBB90_8
-; RV32ZVE32F-NEXT: .LBB90_16: # %cond.store13
+; RV32ZVE32F-NEXT: beqz a0, .LBB100_8
+; RV32ZVE32F-NEXT: .LBB100_16: # %cond.store13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
@@ -10353,74 +15756,74 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idx
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi t2, a3, 1
-; RV64ZVE32F-NEXT: bnez t2, .LBB90_9
+; RV64ZVE32F-NEXT: bnez t2, .LBB100_9
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a1, a3, 2
-; RV64ZVE32F-NEXT: bnez a1, .LBB90_10
-; RV64ZVE32F-NEXT: .LBB90_2: # %else2
+; RV64ZVE32F-NEXT: bnez a1, .LBB100_10
+; RV64ZVE32F-NEXT: .LBB100_2: # %else2
; RV64ZVE32F-NEXT: andi a1, a3, 4
-; RV64ZVE32F-NEXT: bnez a1, .LBB90_11
-; RV64ZVE32F-NEXT: .LBB90_3: # %else4
+; RV64ZVE32F-NEXT: bnez a1, .LBB100_11
+; RV64ZVE32F-NEXT: .LBB100_3: # %else4
; RV64ZVE32F-NEXT: andi a1, a3, 8
-; RV64ZVE32F-NEXT: bnez a1, .LBB90_12
-; RV64ZVE32F-NEXT: .LBB90_4: # %else6
+; RV64ZVE32F-NEXT: bnez a1, .LBB100_12
+; RV64ZVE32F-NEXT: .LBB100_4: # %else6
; RV64ZVE32F-NEXT: andi a1, a3, 16
-; RV64ZVE32F-NEXT: bnez a1, .LBB90_13
-; RV64ZVE32F-NEXT: .LBB90_5: # %else8
+; RV64ZVE32F-NEXT: bnez a1, .LBB100_13
+; RV64ZVE32F-NEXT: .LBB100_5: # %else8
; RV64ZVE32F-NEXT: andi a1, a3, 32
-; RV64ZVE32F-NEXT: bnez a1, .LBB90_14
-; RV64ZVE32F-NEXT: .LBB90_6: # %else10
+; RV64ZVE32F-NEXT: bnez a1, .LBB100_14
+; RV64ZVE32F-NEXT: .LBB100_6: # %else10
; RV64ZVE32F-NEXT: andi a1, a3, 64
-; RV64ZVE32F-NEXT: bnez a1, .LBB90_15
-; RV64ZVE32F-NEXT: .LBB90_7: # %else12
+; RV64ZVE32F-NEXT: bnez a1, .LBB100_15
+; RV64ZVE32F-NEXT: .LBB100_7: # %else12
; RV64ZVE32F-NEXT: andi a1, a3, -128
-; RV64ZVE32F-NEXT: bnez a1, .LBB90_16
-; RV64ZVE32F-NEXT: .LBB90_8: # %else14
+; RV64ZVE32F-NEXT: bnez a1, .LBB100_16
+; RV64ZVE32F-NEXT: .LBB100_8: # %else14
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB90_9: # %cond.store
+; RV64ZVE32F-NEXT: .LBB100_9: # %cond.store
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: slli a1, a1, 3
; RV64ZVE32F-NEXT: add a1, a0, a1
; RV64ZVE32F-NEXT: fsd fa0, 0(a1)
; RV64ZVE32F-NEXT: andi a1, a3, 2
-; RV64ZVE32F-NEXT: beqz a1, .LBB90_2
-; RV64ZVE32F-NEXT: .LBB90_10: # %cond.store1
+; RV64ZVE32F-NEXT: beqz a1, .LBB100_2
+; RV64ZVE32F-NEXT: .LBB100_10: # %cond.store1
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a0, t1
; RV64ZVE32F-NEXT: fsd fa1, 0(t1)
; RV64ZVE32F-NEXT: andi a1, a3, 4
-; RV64ZVE32F-NEXT: beqz a1, .LBB90_3
-; RV64ZVE32F-NEXT: .LBB90_11: # %cond.store3
+; RV64ZVE32F-NEXT: beqz a1, .LBB100_3
+; RV64ZVE32F-NEXT: .LBB100_11: # %cond.store3
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a0, t0
; RV64ZVE32F-NEXT: fsd fa2, 0(t0)
; RV64ZVE32F-NEXT: andi a1, a3, 8
-; RV64ZVE32F-NEXT: beqz a1, .LBB90_4
-; RV64ZVE32F-NEXT: .LBB90_12: # %cond.store5
+; RV64ZVE32F-NEXT: beqz a1, .LBB100_4
+; RV64ZVE32F-NEXT: .LBB100_12: # %cond.store5
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a0, a7
; RV64ZVE32F-NEXT: fsd fa3, 0(a7)
; RV64ZVE32F-NEXT: andi a1, a3, 16
-; RV64ZVE32F-NEXT: beqz a1, .LBB90_5
-; RV64ZVE32F-NEXT: .LBB90_13: # %cond.store7
+; RV64ZVE32F-NEXT: beqz a1, .LBB100_5
+; RV64ZVE32F-NEXT: .LBB100_13: # %cond.store7
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a0, a6
; RV64ZVE32F-NEXT: fsd fa4, 0(a6)
; RV64ZVE32F-NEXT: andi a1, a3, 32
-; RV64ZVE32F-NEXT: beqz a1, .LBB90_6
-; RV64ZVE32F-NEXT: .LBB90_14: # %cond.store9
+; RV64ZVE32F-NEXT: beqz a1, .LBB100_6
+; RV64ZVE32F-NEXT: .LBB100_14: # %cond.store9
; RV64ZVE32F-NEXT: slli a5, a5, 3
; RV64ZVE32F-NEXT: add a5, a0, a5
; RV64ZVE32F-NEXT: fsd fa5, 0(a5)
; RV64ZVE32F-NEXT: andi a1, a3, 64
-; RV64ZVE32F-NEXT: beqz a1, .LBB90_7
-; RV64ZVE32F-NEXT: .LBB90_15: # %cond.store11
+; RV64ZVE32F-NEXT: beqz a1, .LBB100_7
+; RV64ZVE32F-NEXT: .LBB100_15: # %cond.store11
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a0, a4
; RV64ZVE32F-NEXT: fsd fa6, 0(a4)
; RV64ZVE32F-NEXT: andi a1, a3, -128
-; RV64ZVE32F-NEXT: beqz a1, .LBB90_8
-; RV64ZVE32F-NEXT: .LBB90_16: # %cond.store13
+; RV64ZVE32F-NEXT: beqz a1, .LBB100_8
+; RV64ZVE32F-NEXT: .LBB100_16: # %cond.store13
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a0, a0, a2
; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
@@ -10441,28 +15844,28 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs,
; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_v16i8:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vsext.vf8 v16, v9
-; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_v16i8:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64V-NEXT: vsext.vf8 v16, v9
+; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (a0), v16, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_baseidx_v16i8:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
-; RV64ZVE32F-NEXT: beqz a2, .LBB91_2
+; RV64ZVE32F-NEXT: beqz a2, .LBB101_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vse8.v v8, (a2)
-; RV64ZVE32F-NEXT: .LBB91_2: # %else
+; RV64ZVE32F-NEXT: .LBB101_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB91_4
+; RV64ZVE32F-NEXT: beqz a2, .LBB101_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
@@ -10471,30 +15874,30 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
-; RV64ZVE32F-NEXT: .LBB91_4: # %else2
+; RV64ZVE32F-NEXT: .LBB101_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB91_25
+; RV64ZVE32F-NEXT: bnez a2, .LBB101_25
; RV64ZVE32F-NEXT: # %bb.5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB91_26
-; RV64ZVE32F-NEXT: .LBB91_6: # %else6
+; RV64ZVE32F-NEXT: bnez a2, .LBB101_26
+; RV64ZVE32F-NEXT: .LBB101_6: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB91_8
-; RV64ZVE32F-NEXT: .LBB91_7: # %cond.store7
+; RV64ZVE32F-NEXT: beqz a2, .LBB101_8
+; RV64ZVE32F-NEXT: .LBB101_7: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 4
; RV64ZVE32F-NEXT: vse8.v v11, (a2)
-; RV64ZVE32F-NEXT: .LBB91_8: # %else8
+; RV64ZVE32F-NEXT: .LBB101_8: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB91_10
+; RV64ZVE32F-NEXT: beqz a2, .LBB101_10
; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
@@ -10503,21 +15906,21 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 5
; RV64ZVE32F-NEXT: vse8.v v11, (a2)
-; RV64ZVE32F-NEXT: .LBB91_10: # %else10
+; RV64ZVE32F-NEXT: .LBB101_10: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB91_27
+; RV64ZVE32F-NEXT: bnez a2, .LBB101_27
; RV64ZVE32F-NEXT: # %bb.11: # %else12
; RV64ZVE32F-NEXT: andi a2, a1, 128
-; RV64ZVE32F-NEXT: bnez a2, .LBB91_28
-; RV64ZVE32F-NEXT: .LBB91_12: # %else14
+; RV64ZVE32F-NEXT: bnez a2, .LBB101_28
+; RV64ZVE32F-NEXT: .LBB101_12: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 256
-; RV64ZVE32F-NEXT: bnez a2, .LBB91_29
-; RV64ZVE32F-NEXT: .LBB91_13: # %else16
+; RV64ZVE32F-NEXT: bnez a2, .LBB101_29
+; RV64ZVE32F-NEXT: .LBB101_13: # %else16
; RV64ZVE32F-NEXT: andi a2, a1, 512
-; RV64ZVE32F-NEXT: beqz a2, .LBB91_15
-; RV64ZVE32F-NEXT: .LBB91_14: # %cond.store17
+; RV64ZVE32F-NEXT: beqz a2, .LBB101_15
+; RV64ZVE32F-NEXT: .LBB101_14: # %cond.store17
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -10525,23 +15928,23 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 9
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
-; RV64ZVE32F-NEXT: .LBB91_15: # %else18
+; RV64ZVE32F-NEXT: .LBB101_15: # %else18
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
; RV64ZVE32F-NEXT: andi a2, a1, 1024
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB91_30
+; RV64ZVE32F-NEXT: bnez a2, .LBB101_30
; RV64ZVE32F-NEXT: # %bb.16: # %else20
; RV64ZVE32F-NEXT: slli a2, a1, 52
-; RV64ZVE32F-NEXT: bltz a2, .LBB91_31
-; RV64ZVE32F-NEXT: .LBB91_17: # %else22
+; RV64ZVE32F-NEXT: bltz a2, .LBB101_31
+; RV64ZVE32F-NEXT: .LBB101_17: # %else22
; RV64ZVE32F-NEXT: slli a2, a1, 51
-; RV64ZVE32F-NEXT: bltz a2, .LBB91_32
-; RV64ZVE32F-NEXT: .LBB91_18: # %else24
+; RV64ZVE32F-NEXT: bltz a2, .LBB101_32
+; RV64ZVE32F-NEXT: .LBB101_18: # %else24
; RV64ZVE32F-NEXT: slli a2, a1, 50
-; RV64ZVE32F-NEXT: bgez a2, .LBB91_20
-; RV64ZVE32F-NEXT: .LBB91_19: # %cond.store25
+; RV64ZVE32F-NEXT: bgez a2, .LBB101_20
+; RV64ZVE32F-NEXT: .LBB101_19: # %cond.store25
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -10549,21 +15952,21 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 13
; RV64ZVE32F-NEXT: vse8.v v9, (a2)
-; RV64ZVE32F-NEXT: .LBB91_20: # %else26
+; RV64ZVE32F-NEXT: .LBB101_20: # %else26
; RV64ZVE32F-NEXT: slli a2, a1, 49
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
-; RV64ZVE32F-NEXT: bgez a2, .LBB91_22
+; RV64ZVE32F-NEXT: bgez a2, .LBB101_22
; RV64ZVE32F-NEXT: # %bb.21: # %cond.store27
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 14
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
-; RV64ZVE32F-NEXT: .LBB91_22: # %else28
+; RV64ZVE32F-NEXT: .LBB101_22: # %else28
; RV64ZVE32F-NEXT: lui a2, 1048568
; RV64ZVE32F-NEXT: and a1, a1, a2
-; RV64ZVE32F-NEXT: beqz a1, .LBB91_24
+; RV64ZVE32F-NEXT: beqz a1, .LBB101_24
; RV64ZVE32F-NEXT: # %bb.23: # %cond.store29
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
@@ -10572,17 +15975,17 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 15
; RV64ZVE32F-NEXT: vse8.v v8, (a0)
-; RV64ZVE32F-NEXT: .LBB91_24: # %else30
+; RV64ZVE32F-NEXT: .LBB101_24: # %else30
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB91_25: # %cond.store3
+; RV64ZVE32F-NEXT: .LBB101_25: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB91_6
-; RV64ZVE32F-NEXT: .LBB91_26: # %cond.store5
+; RV64ZVE32F-NEXT: beqz a2, .LBB101_6
+; RV64ZVE32F-NEXT: .LBB101_26: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -10591,17 +15994,17 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3
; RV64ZVE32F-NEXT: vse8.v v11, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB91_7
-; RV64ZVE32F-NEXT: j .LBB91_8
-; RV64ZVE32F-NEXT: .LBB91_27: # %cond.store11
+; RV64ZVE32F-NEXT: bnez a2, .LBB101_7
+; RV64ZVE32F-NEXT: j .LBB101_8
+; RV64ZVE32F-NEXT: .LBB101_27: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 6
; RV64ZVE32F-NEXT: vse8.v v11, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 128
-; RV64ZVE32F-NEXT: beqz a2, .LBB91_12
-; RV64ZVE32F-NEXT: .LBB91_28: # %cond.store13
+; RV64ZVE32F-NEXT: beqz a2, .LBB101_12
+; RV64ZVE32F-NEXT: .LBB101_28: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -10610,25 +16013,25 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 7
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 256
-; RV64ZVE32F-NEXT: beqz a2, .LBB91_13
-; RV64ZVE32F-NEXT: .LBB91_29: # %cond.store15
+; RV64ZVE32F-NEXT: beqz a2, .LBB101_13
+; RV64ZVE32F-NEXT: .LBB101_29: # %cond.store15
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 8
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 512
-; RV64ZVE32F-NEXT: bnez a2, .LBB91_14
-; RV64ZVE32F-NEXT: j .LBB91_15
-; RV64ZVE32F-NEXT: .LBB91_30: # %cond.store19
+; RV64ZVE32F-NEXT: bnez a2, .LBB101_14
+; RV64ZVE32F-NEXT: j .LBB101_15
+; RV64ZVE32F-NEXT: .LBB101_30: # %cond.store19
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 10
; RV64ZVE32F-NEXT: vse8.v v11, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 52
-; RV64ZVE32F-NEXT: bgez a2, .LBB91_17
-; RV64ZVE32F-NEXT: .LBB91_31: # %cond.store21
+; RV64ZVE32F-NEXT: bgez a2, .LBB101_17
+; RV64ZVE32F-NEXT: .LBB101_31: # %cond.store21
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
@@ -10637,16 +16040,16 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 11
; RV64ZVE32F-NEXT: vse8.v v9, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 51
-; RV64ZVE32F-NEXT: bgez a2, .LBB91_18
-; RV64ZVE32F-NEXT: .LBB91_32: # %cond.store23
+; RV64ZVE32F-NEXT: bgez a2, .LBB101_18
+; RV64ZVE32F-NEXT: .LBB101_32: # %cond.store23
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 12
; RV64ZVE32F-NEXT: vse8.v v9, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 50
-; RV64ZVE32F-NEXT: bltz a2, .LBB91_19
-; RV64ZVE32F-NEXT: j .LBB91_20
+; RV64ZVE32F-NEXT: bltz a2, .LBB101_19
+; RV64ZVE32F-NEXT: j .LBB101_20
%ptrs = getelementptr inbounds i8, ptr %base, <16 x i8> %idxs
call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %val, <16 x ptr> %ptrs, i32 1, <16 x i1> %m)
ret void
@@ -10664,37 +16067,37 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_baseidx_v32i8:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vsext.vf8 v16, v10
-; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
-; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 16
-; RV64-NEXT: vslidedown.vi v10, v10, 16
-; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64-NEXT: vslidedown.vi v0, v0, 2
-; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
-; RV64-NEXT: vsext.vf8 v16, v10
-; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_v32i8:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64V-NEXT: vsext.vf8 v16, v10
+; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (a0), v16, v0.t
+; RV64V-NEXT: vsetivli zero, 16, e8, m2, ta, ma
+; RV64V-NEXT: vslidedown.vi v8, v8, 16
+; RV64V-NEXT: vslidedown.vi v10, v10, 16
+; RV64V-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64V-NEXT: vslidedown.vi v0, v0, 2
+; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; RV64V-NEXT: vsext.vf8 v16, v10
+; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (a0), v16, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_baseidx_v32i8:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
-; RV64ZVE32F-NEXT: beqz a2, .LBB92_2
+; RV64ZVE32F-NEXT: beqz a2, .LBB102_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vse8.v v8, (a2)
-; RV64ZVE32F-NEXT: .LBB92_2: # %else
+; RV64ZVE32F-NEXT: .LBB102_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB92_4
+; RV64ZVE32F-NEXT: beqz a2, .LBB102_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1
@@ -10703,30 +16106,30 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB92_4: # %else2
+; RV64ZVE32F-NEXT: .LBB102_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v10, 4
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB92_49
+; RV64ZVE32F-NEXT: bnez a2, .LBB102_49
; RV64ZVE32F-NEXT: # %bb.5: # %else4
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: bnez a2, .LBB92_50
-; RV64ZVE32F-NEXT: .LBB92_6: # %else6
+; RV64ZVE32F-NEXT: bnez a2, .LBB102_50
+; RV64ZVE32F-NEXT: .LBB102_6: # %else6
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: beqz a2, .LBB92_8
-; RV64ZVE32F-NEXT: .LBB92_7: # %cond.store7
+; RV64ZVE32F-NEXT: beqz a2, .LBB102_8
+; RV64ZVE32F-NEXT: .LBB102_7: # %cond.store7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB92_8: # %else8
+; RV64ZVE32F-NEXT: .LBB102_8: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB92_10
+; RV64ZVE32F-NEXT: beqz a2, .LBB102_10
; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v14, v13, 1
@@ -10735,21 +16138,21 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 5
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
-; RV64ZVE32F-NEXT: .LBB92_10: # %else10
+; RV64ZVE32F-NEXT: .LBB102_10: # %else10
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2
-; RV64ZVE32F-NEXT: bnez a2, .LBB92_51
+; RV64ZVE32F-NEXT: bnez a2, .LBB102_51
; RV64ZVE32F-NEXT: # %bb.11: # %else12
; RV64ZVE32F-NEXT: andi a2, a1, 128
-; RV64ZVE32F-NEXT: bnez a2, .LBB92_52
-; RV64ZVE32F-NEXT: .LBB92_12: # %else14
+; RV64ZVE32F-NEXT: bnez a2, .LBB102_52
+; RV64ZVE32F-NEXT: .LBB102_12: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 256
-; RV64ZVE32F-NEXT: bnez a2, .LBB92_53
-; RV64ZVE32F-NEXT: .LBB92_13: # %else16
+; RV64ZVE32F-NEXT: bnez a2, .LBB102_53
+; RV64ZVE32F-NEXT: .LBB102_13: # %else16
; RV64ZVE32F-NEXT: andi a2, a1, 512
-; RV64ZVE32F-NEXT: beqz a2, .LBB92_15
-; RV64ZVE32F-NEXT: .LBB92_14: # %cond.store17
+; RV64ZVE32F-NEXT: beqz a2, .LBB102_15
+; RV64ZVE32F-NEXT: .LBB102_14: # %cond.store17
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
@@ -10757,22 +16160,22 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 9
; RV64ZVE32F-NEXT: vse8.v v13, (a2)
-; RV64ZVE32F-NEXT: .LBB92_15: # %else18
+; RV64ZVE32F-NEXT: .LBB102_15: # %else18
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 4
; RV64ZVE32F-NEXT: andi a2, a1, 1024
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2
-; RV64ZVE32F-NEXT: beqz a2, .LBB92_17
+; RV64ZVE32F-NEXT: beqz a2, .LBB102_17
; RV64ZVE32F-NEXT: # %bb.16: # %cond.store19
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 10
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
-; RV64ZVE32F-NEXT: .LBB92_17: # %else20
+; RV64ZVE32F-NEXT: .LBB102_17: # %else20
; RV64ZVE32F-NEXT: slli a2, a1, 52
-; RV64ZVE32F-NEXT: bgez a2, .LBB92_19
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_19
; RV64ZVE32F-NEXT: # %bb.18: # %cond.store21
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
@@ -10781,20 +16184,20 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 11
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB92_19: # %else22
+; RV64ZVE32F-NEXT: .LBB102_19: # %else22
; RV64ZVE32F-NEXT: slli a2, a1, 51
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 16
-; RV64ZVE32F-NEXT: bgez a2, .LBB92_21
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_21
; RV64ZVE32F-NEXT: # %bb.20: # %cond.store23
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 12
; RV64ZVE32F-NEXT: vse8.v v11, (a2)
-; RV64ZVE32F-NEXT: .LBB92_21: # %else24
+; RV64ZVE32F-NEXT: .LBB102_21: # %else24
; RV64ZVE32F-NEXT: slli a2, a1, 50
-; RV64ZVE32F-NEXT: bgez a2, .LBB92_23
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_23
; RV64ZVE32F-NEXT: # %bb.22: # %cond.store25
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v13, 1
@@ -10803,21 +16206,21 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 13
; RV64ZVE32F-NEXT: vse8.v v11, (a2)
-; RV64ZVE32F-NEXT: .LBB92_23: # %else26
+; RV64ZVE32F-NEXT: .LBB102_23: # %else26
; RV64ZVE32F-NEXT: slli a2, a1, 49
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v13, 2
-; RV64ZVE32F-NEXT: bltz a2, .LBB92_54
+; RV64ZVE32F-NEXT: bltz a2, .LBB102_54
; RV64ZVE32F-NEXT: # %bb.24: # %else28
; RV64ZVE32F-NEXT: slli a2, a1, 48
-; RV64ZVE32F-NEXT: bltz a2, .LBB92_55
-; RV64ZVE32F-NEXT: .LBB92_25: # %else30
+; RV64ZVE32F-NEXT: bltz a2, .LBB102_55
+; RV64ZVE32F-NEXT: .LBB102_25: # %else30
; RV64ZVE32F-NEXT: slli a2, a1, 47
-; RV64ZVE32F-NEXT: bltz a2, .LBB92_56
-; RV64ZVE32F-NEXT: .LBB92_26: # %else32
+; RV64ZVE32F-NEXT: bltz a2, .LBB102_56
+; RV64ZVE32F-NEXT: .LBB102_26: # %else32
; RV64ZVE32F-NEXT: slli a2, a1, 46
-; RV64ZVE32F-NEXT: bgez a2, .LBB92_28
-; RV64ZVE32F-NEXT: .LBB92_27: # %cond.store33
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_28
+; RV64ZVE32F-NEXT: .LBB102_27: # %cond.store33
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -10826,31 +16229,31 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 17
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB92_28: # %else34
+; RV64ZVE32F-NEXT: .LBB102_28: # %else34
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
; RV64ZVE32F-NEXT: slli a2, a1, 45
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 2
-; RV64ZVE32F-NEXT: bltz a2, .LBB92_57
+; RV64ZVE32F-NEXT: bltz a2, .LBB102_57
; RV64ZVE32F-NEXT: # %bb.29: # %else36
; RV64ZVE32F-NEXT: slli a2, a1, 44
-; RV64ZVE32F-NEXT: bltz a2, .LBB92_58
-; RV64ZVE32F-NEXT: .LBB92_30: # %else38
+; RV64ZVE32F-NEXT: bltz a2, .LBB102_58
+; RV64ZVE32F-NEXT: .LBB102_30: # %else38
; RV64ZVE32F-NEXT: slli a2, a1, 43
-; RV64ZVE32F-NEXT: bgez a2, .LBB92_32
-; RV64ZVE32F-NEXT: .LBB92_31: # %cond.store39
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_32
+; RV64ZVE32F-NEXT: .LBB102_31: # %cond.store39
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 20
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB92_32: # %else40
+; RV64ZVE32F-NEXT: .LBB102_32: # %else40
; RV64ZVE32F-NEXT: slli a2, a1, 42
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 8
-; RV64ZVE32F-NEXT: bgez a2, .LBB92_34
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_34
; RV64ZVE32F-NEXT: # %bb.33: # %cond.store41
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v11, 1
@@ -10860,21 +16263,21 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 21
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB92_34: # %else42
+; RV64ZVE32F-NEXT: .LBB102_34: # %else42
; RV64ZVE32F-NEXT: slli a2, a1, 41
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 2
-; RV64ZVE32F-NEXT: bltz a2, .LBB92_59
+; RV64ZVE32F-NEXT: bltz a2, .LBB102_59
; RV64ZVE32F-NEXT: # %bb.35: # %else44
; RV64ZVE32F-NEXT: slli a2, a1, 40
-; RV64ZVE32F-NEXT: bltz a2, .LBB92_60
-; RV64ZVE32F-NEXT: .LBB92_36: # %else46
+; RV64ZVE32F-NEXT: bltz a2, .LBB102_60
+; RV64ZVE32F-NEXT: .LBB102_36: # %else46
; RV64ZVE32F-NEXT: slli a2, a1, 39
-; RV64ZVE32F-NEXT: bltz a2, .LBB92_61
-; RV64ZVE32F-NEXT: .LBB92_37: # %else48
+; RV64ZVE32F-NEXT: bltz a2, .LBB102_61
+; RV64ZVE32F-NEXT: .LBB102_37: # %else48
; RV64ZVE32F-NEXT: slli a2, a1, 38
-; RV64ZVE32F-NEXT: bgez a2, .LBB92_39
-; RV64ZVE32F-NEXT: .LBB92_38: # %cond.store49
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_39
+; RV64ZVE32F-NEXT: .LBB102_38: # %cond.store49
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -10883,23 +16286,23 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 25
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB92_39: # %else50
+; RV64ZVE32F-NEXT: .LBB102_39: # %else50
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
; RV64ZVE32F-NEXT: slli a2, a1, 37
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
-; RV64ZVE32F-NEXT: bltz a2, .LBB92_62
+; RV64ZVE32F-NEXT: bltz a2, .LBB102_62
; RV64ZVE32F-NEXT: # %bb.40: # %else52
; RV64ZVE32F-NEXT: slli a2, a1, 36
-; RV64ZVE32F-NEXT: bltz a2, .LBB92_63
-; RV64ZVE32F-NEXT: .LBB92_41: # %else54
+; RV64ZVE32F-NEXT: bltz a2, .LBB102_63
+; RV64ZVE32F-NEXT: .LBB102_41: # %else54
; RV64ZVE32F-NEXT: slli a2, a1, 35
-; RV64ZVE32F-NEXT: bltz a2, .LBB92_64
-; RV64ZVE32F-NEXT: .LBB92_42: # %else56
+; RV64ZVE32F-NEXT: bltz a2, .LBB102_64
+; RV64ZVE32F-NEXT: .LBB102_42: # %else56
; RV64ZVE32F-NEXT: slli a2, a1, 34
-; RV64ZVE32F-NEXT: bgez a2, .LBB92_44
-; RV64ZVE32F-NEXT: .LBB92_43: # %cond.store57
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_44
+; RV64ZVE32F-NEXT: .LBB102_43: # %cond.store57
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -10908,11 +16311,11 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 29
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB92_44: # %else58
+; RV64ZVE32F-NEXT: .LBB102_44: # %else58
; RV64ZVE32F-NEXT: slli a2, a1, 33
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
-; RV64ZVE32F-NEXT: bgez a2, .LBB92_46
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_46
; RV64ZVE32F-NEXT: # %bb.45: # %cond.store59
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -10920,10 +16323,10 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 30
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
-; RV64ZVE32F-NEXT: .LBB92_46: # %else60
+; RV64ZVE32F-NEXT: .LBB102_46: # %else60
; RV64ZVE32F-NEXT: lui a2, 524288
; RV64ZVE32F-NEXT: and a1, a1, a2
-; RV64ZVE32F-NEXT: beqz a1, .LBB92_48
+; RV64ZVE32F-NEXT: beqz a1, .LBB102_48
; RV64ZVE32F-NEXT: # %bb.47: # %cond.store61
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
@@ -10933,17 +16336,17 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 31
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v8, (a0)
-; RV64ZVE32F-NEXT: .LBB92_48: # %else62
+; RV64ZVE32F-NEXT: .LBB102_48: # %else62
; RV64ZVE32F-NEXT: ret
-; RV64ZVE32F-NEXT: .LBB92_49: # %cond.store3
+; RV64ZVE32F-NEXT: .LBB102_49: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 2
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 8
-; RV64ZVE32F-NEXT: beqz a2, .LBB92_6
-; RV64ZVE32F-NEXT: .LBB92_50: # %cond.store5
+; RV64ZVE32F-NEXT: beqz a2, .LBB102_6
+; RV64ZVE32F-NEXT: .LBB102_50: # %cond.store5
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
@@ -10952,17 +16355,17 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 3
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
-; RV64ZVE32F-NEXT: bnez a2, .LBB92_7
-; RV64ZVE32F-NEXT: j .LBB92_8
-; RV64ZVE32F-NEXT: .LBB92_51: # %cond.store11
+; RV64ZVE32F-NEXT: bnez a2, .LBB102_7
+; RV64ZVE32F-NEXT: j .LBB102_8
+; RV64ZVE32F-NEXT: .LBB102_51: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 6
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 128
-; RV64ZVE32F-NEXT: beqz a2, .LBB92_12
-; RV64ZVE32F-NEXT: .LBB92_52: # %cond.store13
+; RV64ZVE32F-NEXT: beqz a2, .LBB102_12
+; RV64ZVE32F-NEXT: .LBB102_52: # %cond.store13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
@@ -10971,25 +16374,25 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 7
; RV64ZVE32F-NEXT: vse8.v v13, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 256
-; RV64ZVE32F-NEXT: beqz a2, .LBB92_13
-; RV64ZVE32F-NEXT: .LBB92_53: # %cond.store15
+; RV64ZVE32F-NEXT: beqz a2, .LBB102_13
+; RV64ZVE32F-NEXT: .LBB102_53: # %cond.store15
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 8
; RV64ZVE32F-NEXT: vse8.v v13, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 512
-; RV64ZVE32F-NEXT: bnez a2, .LBB92_14
-; RV64ZVE32F-NEXT: j .LBB92_15
-; RV64ZVE32F-NEXT: .LBB92_54: # %cond.store27
+; RV64ZVE32F-NEXT: bnez a2, .LBB102_14
+; RV64ZVE32F-NEXT: j .LBB102_15
+; RV64ZVE32F-NEXT: .LBB102_54: # %cond.store27
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 14
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 48
-; RV64ZVE32F-NEXT: bgez a2, .LBB92_25
-; RV64ZVE32F-NEXT: .LBB92_55: # %cond.store29
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_25
+; RV64ZVE32F-NEXT: .LBB102_55: # %cond.store29
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -10998,8 +16401,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 15
; RV64ZVE32F-NEXT: vse8.v v11, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 47
-; RV64ZVE32F-NEXT: bgez a2, .LBB92_26
-; RV64ZVE32F-NEXT: .LBB92_56: # %cond.store31
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_26
+; RV64ZVE32F-NEXT: .LBB102_56: # %cond.store31
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -11007,9 +16410,9 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 46
-; RV64ZVE32F-NEXT: bltz a2, .LBB92_27
-; RV64ZVE32F-NEXT: j .LBB92_28
-; RV64ZVE32F-NEXT: .LBB92_57: # %cond.store35
+; RV64ZVE32F-NEXT: bltz a2, .LBB102_27
+; RV64ZVE32F-NEXT: j .LBB102_28
+; RV64ZVE32F-NEXT: .LBB102_57: # %cond.store35
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
@@ -11017,8 +16420,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 44
-; RV64ZVE32F-NEXT: bgez a2, .LBB92_30
-; RV64ZVE32F-NEXT: .LBB92_58: # %cond.store37
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_30
+; RV64ZVE32F-NEXT: .LBB102_58: # %cond.store37
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
@@ -11028,9 +16431,9 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 43
-; RV64ZVE32F-NEXT: bltz a2, .LBB92_31
-; RV64ZVE32F-NEXT: j .LBB92_32
-; RV64ZVE32F-NEXT: .LBB92_59: # %cond.store43
+; RV64ZVE32F-NEXT: bltz a2, .LBB102_31
+; RV64ZVE32F-NEXT: j .LBB102_32
+; RV64ZVE32F-NEXT: .LBB102_59: # %cond.store43
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
@@ -11038,8 +16441,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 40
-; RV64ZVE32F-NEXT: bgez a2, .LBB92_36
-; RV64ZVE32F-NEXT: .LBB92_60: # %cond.store45
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_36
+; RV64ZVE32F-NEXT: .LBB102_60: # %cond.store45
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
@@ -11049,8 +16452,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 39
-; RV64ZVE32F-NEXT: bgez a2, .LBB92_37
-; RV64ZVE32F-NEXT: .LBB92_61: # %cond.store47
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_37
+; RV64ZVE32F-NEXT: .LBB102_61: # %cond.store47
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -11058,9 +16461,9 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 38
-; RV64ZVE32F-NEXT: bltz a2, .LBB92_38
-; RV64ZVE32F-NEXT: j .LBB92_39
-; RV64ZVE32F-NEXT: .LBB92_62: # %cond.store51
+; RV64ZVE32F-NEXT: bltz a2, .LBB102_38
+; RV64ZVE32F-NEXT: j .LBB102_39
+; RV64ZVE32F-NEXT: .LBB102_62: # %cond.store51
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
@@ -11068,8 +16471,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 36
-; RV64ZVE32F-NEXT: bgez a2, .LBB92_41
-; RV64ZVE32F-NEXT: .LBB92_63: # %cond.store53
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_41
+; RV64ZVE32F-NEXT: .LBB102_63: # %cond.store53
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
@@ -11079,8 +16482,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 35
-; RV64ZVE32F-NEXT: bgez a2, .LBB92_42
-; RV64ZVE32F-NEXT: .LBB92_64: # %cond.store55
+; RV64ZVE32F-NEXT: bgez a2, .LBB102_42
+; RV64ZVE32F-NEXT: .LBB102_64: # %cond.store55
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
@@ -11088,8 +16491,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 34
-; RV64ZVE32F-NEXT: bltz a2, .LBB92_43
-; RV64ZVE32F-NEXT: j .LBB92_44
+; RV64ZVE32F-NEXT: bltz a2, .LBB102_43
+; RV64ZVE32F-NEXT: j .LBB102_44
%ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs
call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> %val, <32 x ptr> %ptrs, i32 1, <32 x i1> %m)
ret void
@@ -11140,13 +16543,13 @@ define void @mscatter_shuffle_rotate(<8 x i16> %val, ptr %base) {
; RV32-NEXT: vse16.v v9, (a0)
; RV32-NEXT: ret
;
-; RV64-LABEL: mscatter_shuffle_rotate:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 4
-; RV64-NEXT: vslideup.vi v9, v8, 4
-; RV64-NEXT: vse16.v v9, (a0)
-; RV64-NEXT: ret
+; RV64V-LABEL: mscatter_shuffle_rotate:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vslidedown.vi v9, v8, 4
+; RV64V-NEXT: vslideup.vi v9, v8, 4
+; RV64V-NEXT: vse16.v v9, (a0)
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_shuffle_rotate:
; RV64ZVE32F: # %bb.0:
@@ -11178,3 +16581,5 @@ define void @mscatter_shuffle_rotate(<8 x i16> %val, ptr %base) {
call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true))
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV64: {{.*}}
>From 86fa3e3beaaf4d6c3eaad784e036dd308fe6ddf6 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 5 Nov 2024 15:24:25 +0800
Subject: [PATCH 2/2] [RISCV] Lower mgather/mscatter for zvfhmin/zvfbfmin
In preparation for allowing zvfhmin and zvfbfmin in isLegalElementTypeForRVV, this lowers masked gathers and scatters.
We need to mark f16 and bf16 as legal in isLegalMaskedGatherScatter otherwise ScalarizeMaskedMemIntrin will just scalarize them, but we can move this back into isLegalElementTypeForRVV afterwards.
The scalarized codegen required #114938, #114927 and #114915 to not crash.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 8 +-
.../Target/RISCV/RISCVTargetTransformInfo.h | 7 +-
.../RISCV/rvv/fixed-vectors-masked-gather.ll | 3928 +---------------
.../RISCV/rvv/fixed-vectors-masked-scatter.ll | 4068 +----------------
4 files changed, 386 insertions(+), 7625 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 3b6dd0c11bbf90..613cb01667d044 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1339,9 +1339,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VECTOR_SHUFFLE, ISD::VECTOR_COMPRESS},
VT, Custom);
- // FIXME: mload, mstore, mgather, mscatter, vp_gather/scatter can be
+ // FIXME: mload, mstore, vp_gather/scatter can be
// hoisted to here.
- setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
+ setOperationAction({ISD::LOAD, ISD::STORE, ISD::MGATHER, ISD::MSCATTER},
+ VT, Custom);
setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
ISD::EXPERIMENTAL_VP_STRIDED_STORE},
@@ -1408,8 +1409,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::BUILD_VECTOR, ISD::SCALAR_TO_VECTOR}, VT,
Custom);
- setOperationAction(
- {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
+ setOperationAction({ISD::MLOAD, ISD::MSTORE}, VT, Custom);
setOperationAction({ISD::VP_GATHER, ISD::VP_SCATTER}, VT, Custom);
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 29a6c68a6c585a..7d1e9007adc0df 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -270,7 +270,12 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize())
return false;
- return TLI->isLegalElementTypeForRVV(ElemType);
+ // TODO: Move bf16/f16 support into isLegalElementTypeForRVV
+ return TLI->isLegalElementTypeForRVV(ElemType) ||
+ (DataTypeVT.getVectorElementType() == MVT::bf16 &&
+ ST->hasVInstructionsBF16Minimal()) ||
+ (DataTypeVT.getVectorElementType() == MVT::f16 &&
+ ST->hasVInstructionsF16Minimal());
}
bool isLegalMaskedGather(Type *DataType, Align Alignment) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 2d7e1bf314f7ac..5a7b512e4ea5f5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -6888,43 +6888,22 @@ declare <1 x bfloat> @llvm.masked.gather.v1bf16.v1p0(<1 x ptr>, i32, <1 x i1>, <
define <1 x bfloat> @mgather_v1bf16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x bfloat> %passthru) {
; RV32V-LABEL: mgather_v1bf16:
; RV32V: # %bb.0:
-; RV32V-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; RV32V-NEXT: vfirst.m a0, v0
-; RV32V-NEXT: bnez a0, .LBB58_2
-; RV32V-NEXT: # %bb.1: # %cond.load
-; RV32V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; RV32V-NEXT: vmv.x.s a0, v8
-; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV32V-NEXT: vle16.v v9, (a0)
-; RV32V-NEXT: .LBB58_2: # %else
+; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
+; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv1r.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v1bf16:
; RV64V: # %bb.0:
-; RV64V-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; RV64V-NEXT: vfirst.m a0, v0
-; RV64V-NEXT: bnez a0, .LBB58_2
-; RV64V-NEXT: # %bb.1: # %cond.load
-; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; RV64V-NEXT: vmv.x.s a0, v8
-; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV64V-NEXT: vle16.v v9, (a0)
-; RV64V-NEXT: .LBB58_2: # %else
+; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
+; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v1bf16:
; RV32ZVE32F: # %bb.0:
-; RV32ZVE32F-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
-; RV32ZVE32F-NEXT: vfirst.m a0, v0
-; RV32ZVE32F-NEXT: bnez a0, .LBB58_2
-; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
-; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32ZVE32F-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32ZVE32F-NEXT: vle16.v v9, (a0)
-; RV32ZVE32F-NEXT: .LBB58_2: # %else
+; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu
+; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vmv1r.v v8, v9
; RV32ZVE32F-NEXT: ret
;
@@ -6947,94 +6926,22 @@ declare <2 x bfloat> @llvm.masked.gather.v2bf16.v2p0(<2 x ptr>, i32, <2 x i1>, <
define <2 x bfloat> @mgather_v2bf16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x bfloat> %passthru) {
; RV32V-LABEL: mgather_v2bf16:
; RV32V: # %bb.0:
-; RV32V-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32V-NEXT: vmv.x.s a0, v0
-; RV32V-NEXT: andi a1, a0, 1
-; RV32V-NEXT: bnez a1, .LBB59_3
-; RV32V-NEXT: # %bb.1: # %else
-; RV32V-NEXT: andi a0, a0, 2
-; RV32V-NEXT: bnez a0, .LBB59_4
-; RV32V-NEXT: .LBB59_2: # %else2
-; RV32V-NEXT: vmv1r.v v8, v9
-; RV32V-NEXT: ret
-; RV32V-NEXT: .LBB59_3: # %cond.load
-; RV32V-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV32V-NEXT: vmv.x.s a1, v8
-; RV32V-NEXT: lh a1, 0(a1)
-; RV32V-NEXT: vsetvli zero, zero, e16, m2, tu, ma
-; RV32V-NEXT: vmv.s.x v9, a1
-; RV32V-NEXT: andi a0, a0, 2
-; RV32V-NEXT: beqz a0, .LBB59_2
-; RV32V-NEXT: .LBB59_4: # %cond.load1
-; RV32V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV32V-NEXT: vslidedown.vi v8, v8, 1
-; RV32V-NEXT: vmv.x.s a0, v8
-; RV32V-NEXT: lh a0, 0(a0)
-; RV32V-NEXT: vmv.s.x v8, a0
-; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV32V-NEXT: vslideup.vi v9, v8, 1
+; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
+; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv1r.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2bf16:
; RV64V: # %bb.0:
-; RV64V-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64V-NEXT: vmv.x.s a0, v0
-; RV64V-NEXT: andi a1, a0, 1
-; RV64V-NEXT: bnez a1, .LBB59_3
-; RV64V-NEXT: # %bb.1: # %else
-; RV64V-NEXT: andi a0, a0, 2
-; RV64V-NEXT: bnez a0, .LBB59_4
-; RV64V-NEXT: .LBB59_2: # %else2
-; RV64V-NEXT: vmv1r.v v8, v9
-; RV64V-NEXT: ret
-; RV64V-NEXT: .LBB59_3: # %cond.load
-; RV64V-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV64V-NEXT: vmv.x.s a1, v8
-; RV64V-NEXT: lh a1, 0(a1)
-; RV64V-NEXT: vsetvli zero, zero, e16, m2, tu, ma
-; RV64V-NEXT: vmv.s.x v9, a1
-; RV64V-NEXT: andi a0, a0, 2
-; RV64V-NEXT: beqz a0, .LBB59_2
-; RV64V-NEXT: .LBB59_4: # %cond.load1
-; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v8, v8, 1
-; RV64V-NEXT: vmv.x.s a0, v8
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 1
+; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
+; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2bf16:
; RV32ZVE32F: # %bb.0:
-; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32ZVE32F-NEXT: vmv.x.s a0, v0
-; RV32ZVE32F-NEXT: andi a1, a0, 1
-; RV32ZVE32F-NEXT: bnez a1, .LBB59_3
-; RV32ZVE32F-NEXT: # %bb.1: # %else
-; RV32ZVE32F-NEXT: andi a0, a0, 2
-; RV32ZVE32F-NEXT: bnez a0, .LBB59_4
-; RV32ZVE32F-NEXT: .LBB59_2: # %else2
-; RV32ZVE32F-NEXT: vmv1r.v v8, v9
-; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB59_3: # %cond.load
-; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV32ZVE32F-NEXT: vmv.x.s a1, v8
-; RV32ZVE32F-NEXT: lh a1, 0(a1)
-; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma
-; RV32ZVE32F-NEXT: vmv.s.x v9, a1
-; RV32ZVE32F-NEXT: andi a0, a0, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB59_2
-; RV32ZVE32F-NEXT: .LBB59_4: # %cond.load1
-; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV32ZVE32F-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV32ZVE32F-NEXT: vslideup.vi v9, v8, 1
+; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
+; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vmv1r.v v8, v9
; RV32ZVE32F-NEXT: ret
;
@@ -7071,115 +6978,15 @@ declare <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr>, i32, <4 x i1>, <
define <4 x bfloat> @mgather_v4bf16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x bfloat> %passthru) {
; RV32-LABEL: mgather_v4bf16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv.x.s a0, v0
-; RV32-NEXT: andi a1, a0, 1
-; RV32-NEXT: bnez a1, .LBB60_5
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a1, a0, 2
-; RV32-NEXT: bnez a1, .LBB60_6
-; RV32-NEXT: .LBB60_2: # %else2
-; RV32-NEXT: andi a1, a0, 4
-; RV32-NEXT: bnez a1, .LBB60_7
-; RV32-NEXT: .LBB60_3: # %else5
-; RV32-NEXT: andi a0, a0, 8
-; RV32-NEXT: bnez a0, .LBB60_8
-; RV32-NEXT: .LBB60_4: # %else8
-; RV32-NEXT: vmv1r.v v8, v9
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB60_5: # %cond.load
-; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV32-NEXT: vmv.x.s a1, v8
-; RV32-NEXT: lh a1, 0(a1)
-; RV32-NEXT: vsetvli zero, zero, e16, m2, tu, ma
-; RV32-NEXT: vmv.s.x v9, a1
-; RV32-NEXT: andi a1, a0, 2
-; RV32-NEXT: beqz a1, .LBB60_2
-; RV32-NEXT: .LBB60_6: # %cond.load1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 1
-; RV32-NEXT: vmv.x.s a1, v10
-; RV32-NEXT: lh a1, 0(a1)
-; RV32-NEXT: vmv.s.x v10, a1
-; RV32-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
-; RV32-NEXT: vslideup.vi v9, v10, 1
-; RV32-NEXT: andi a1, a0, 4
-; RV32-NEXT: beqz a1, .LBB60_3
-; RV32-NEXT: .LBB60_7: # %cond.load4
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 2
-; RV32-NEXT: vmv.x.s a1, v10
-; RV32-NEXT: lh a1, 0(a1)
-; RV32-NEXT: vmv.s.x v10, a1
-; RV32-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
-; RV32-NEXT: vslideup.vi v9, v10, 2
-; RV32-NEXT: andi a0, a0, 8
-; RV32-NEXT: beqz a0, .LBB60_4
-; RV32-NEXT: .LBB60_8: # %cond.load7
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vslideup.vi v9, v8, 3
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_v4bf16:
; RV64V: # %bb.0:
-; RV64V-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64V-NEXT: vmv.x.s a0, v0
-; RV64V-NEXT: andi a1, a0, 1
-; RV64V-NEXT: bnez a1, .LBB60_5
-; RV64V-NEXT: # %bb.1: # %else
-; RV64V-NEXT: andi a1, a0, 2
-; RV64V-NEXT: bnez a1, .LBB60_6
-; RV64V-NEXT: .LBB60_2: # %else2
-; RV64V-NEXT: andi a1, a0, 4
-; RV64V-NEXT: bnez a1, .LBB60_7
-; RV64V-NEXT: .LBB60_3: # %else5
-; RV64V-NEXT: andi a0, a0, 8
-; RV64V-NEXT: bnez a0, .LBB60_8
-; RV64V-NEXT: .LBB60_4: # %else8
-; RV64V-NEXT: vmv1r.v v8, v10
-; RV64V-NEXT: ret
-; RV64V-NEXT: .LBB60_5: # %cond.load
-; RV64V-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV64V-NEXT: vmv.x.s a1, v8
-; RV64V-NEXT: lh a1, 0(a1)
-; RV64V-NEXT: vsetvli zero, zero, e16, m2, tu, ma
-; RV64V-NEXT: vmv.s.x v10, a1
-; RV64V-NEXT: andi a1, a0, 2
-; RV64V-NEXT: beqz a1, .LBB60_2
-; RV64V-NEXT: .LBB60_6: # %cond.load1
-; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v11, v8, 1
-; RV64V-NEXT: vmv.x.s a1, v11
-; RV64V-NEXT: lh a1, 0(a1)
-; RV64V-NEXT: vmv.s.x v11, a1
-; RV64V-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
-; RV64V-NEXT: vslideup.vi v10, v11, 1
-; RV64V-NEXT: andi a1, a0, 4
-; RV64V-NEXT: beqz a1, .LBB60_3
-; RV64V-NEXT: .LBB60_7: # %cond.load4
-; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-NEXT: vslidedown.vi v12, v8, 2
-; RV64V-NEXT: vmv.x.s a1, v12
-; RV64V-NEXT: lh a1, 0(a1)
-; RV64V-NEXT: vmv.s.x v11, a1
-; RV64V-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
-; RV64V-NEXT: vslideup.vi v10, v11, 2
-; RV64V-NEXT: andi a0, a0, 8
-; RV64V-NEXT: beqz a0, .LBB60_4
-; RV64V-NEXT: .LBB60_8: # %cond.load7
-; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-NEXT: vslidedown.vi v8, v8, 3
-; RV64V-NEXT: vmv.x.s a0, v8
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64V-NEXT: vslideup.vi v10, v8, 3
+; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
@@ -7238,45 +7045,16 @@ define <4 x bfloat> @mgather_v4bf16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x bfloat> %
define <4 x bfloat> @mgather_truemask_v4bf16(<4 x ptr> %ptrs, <4 x bfloat> %passthru) {
; RV32-LABEL: mgather_truemask_v4bf16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: vslidedown.vi v9, v8, 1
-; RV32-NEXT: vmv.x.s a1, v9
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: vmv.x.s a2, v9
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: vmv.x.s a3, v8
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: lh a1, 0(a1)
-; RV32-NEXT: lh a2, 0(a2)
-; RV32-NEXT: lh a3, 0(a3)
; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: vslide1down.vx v8, v8, a2
-; RV32-NEXT: vslide1down.vx v8, v8, a3
+; RV32-NEXT: vluxei32.v v9, (zero), v8
+; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_truemask_v4bf16:
; RV64V: # %bb.0:
-; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-NEXT: vmv.x.s a0, v8
-; RV64V-NEXT: vslidedown.vi v10, v8, 1
-; RV64V-NEXT: vmv.x.s a1, v10
-; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-NEXT: vslidedown.vi v10, v8, 2
-; RV64V-NEXT: vmv.x.s a2, v10
-; RV64V-NEXT: vslidedown.vi v8, v8, 3
-; RV64V-NEXT: vmv.x.s a3, v8
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: lh a1, 0(a1)
-; RV64V-NEXT: lh a2, 0(a2)
-; RV64V-NEXT: lh a3, 0(a3)
; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64V-NEXT: vmv.v.x v8, a0
-; RV64V-NEXT: vslide1down.vx v8, v8, a1
-; RV64V-NEXT: vslide1down.vx v8, v8, a2
-; RV64V-NEXT: vslide1down.vx v8, v8, a3
+; RV64V-NEXT: vluxei64.v v10, (zero), v8
+; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_truemask_v4bf16:
@@ -7322,236 +7100,17 @@ declare <8 x bfloat> @llvm.masked.gather.v8bf16.v8p0(<8 x ptr>, i32, <8 x i1>, <
define <8 x bfloat> @mgather_v8bf16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x bfloat> %passthru) {
; RV32-LABEL: mgather_v8bf16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv.x.s a0, v0
-; RV32-NEXT: andi a1, a0, 1
-; RV32-NEXT: bnez a1, .LBB63_9
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a1, a0, 2
-; RV32-NEXT: bnez a1, .LBB63_10
-; RV32-NEXT: .LBB63_2: # %else2
-; RV32-NEXT: andi a1, a0, 4
-; RV32-NEXT: bnez a1, .LBB63_11
-; RV32-NEXT: .LBB63_3: # %else5
-; RV32-NEXT: andi a1, a0, 8
-; RV32-NEXT: bnez a1, .LBB63_12
-; RV32-NEXT: .LBB63_4: # %else8
-; RV32-NEXT: andi a1, a0, 16
-; RV32-NEXT: bnez a1, .LBB63_13
-; RV32-NEXT: .LBB63_5: # %else11
-; RV32-NEXT: andi a1, a0, 32
-; RV32-NEXT: bnez a1, .LBB63_14
-; RV32-NEXT: .LBB63_6: # %else14
-; RV32-NEXT: andi a1, a0, 64
-; RV32-NEXT: bnez a1, .LBB63_15
-; RV32-NEXT: .LBB63_7: # %else17
-; RV32-NEXT: andi a0, a0, -128
-; RV32-NEXT: bnez a0, .LBB63_16
-; RV32-NEXT: .LBB63_8: # %else20
-; RV32-NEXT: vmv1r.v v8, v10
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB63_9: # %cond.load
-; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV32-NEXT: vmv.x.s a1, v8
-; RV32-NEXT: lh a1, 0(a1)
-; RV32-NEXT: vsetvli zero, zero, e16, m2, tu, ma
-; RV32-NEXT: vmv.s.x v10, a1
-; RV32-NEXT: andi a1, a0, 2
-; RV32-NEXT: beqz a1, .LBB63_2
-; RV32-NEXT: .LBB63_10: # %cond.load1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v11, v8, 1
-; RV32-NEXT: vmv.x.s a1, v11
-; RV32-NEXT: lh a1, 0(a1)
-; RV32-NEXT: vmv.s.x v11, a1
-; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v10, v11, 1
-; RV32-NEXT: andi a1, a0, 4
-; RV32-NEXT: beqz a1, .LBB63_3
-; RV32-NEXT: .LBB63_11: # %cond.load4
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v11, v8, 2
-; RV32-NEXT: vmv.x.s a1, v11
-; RV32-NEXT: lh a1, 0(a1)
-; RV32-NEXT: vmv.s.x v11, a1
-; RV32-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v10, v11, 2
-; RV32-NEXT: andi a1, a0, 8
-; RV32-NEXT: beqz a1, .LBB63_4
-; RV32-NEXT: .LBB63_12: # %cond.load7
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v11, v8, 3
-; RV32-NEXT: vmv.x.s a1, v11
-; RV32-NEXT: lh a1, 0(a1)
-; RV32-NEXT: vmv.s.x v11, a1
-; RV32-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v10, v11, 3
-; RV32-NEXT: andi a1, a0, 16
-; RV32-NEXT: beqz a1, .LBB63_5
-; RV32-NEXT: .LBB63_13: # %cond.load10
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 4
-; RV32-NEXT: vmv.x.s a1, v12
-; RV32-NEXT: lh a1, 0(a1)
-; RV32-NEXT: vmv.s.x v11, a1
-; RV32-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v10, v11, 4
-; RV32-NEXT: andi a1, a0, 32
-; RV32-NEXT: beqz a1, .LBB63_6
-; RV32-NEXT: .LBB63_14: # %cond.load13
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 5
-; RV32-NEXT: vmv.x.s a1, v12
-; RV32-NEXT: lh a1, 0(a1)
-; RV32-NEXT: vmv.s.x v11, a1
-; RV32-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v10, v11, 5
-; RV32-NEXT: andi a1, a0, 64
-; RV32-NEXT: beqz a1, .LBB63_7
-; RV32-NEXT: .LBB63_15: # %cond.load16
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v8, 6
-; RV32-NEXT: vmv.x.s a1, v12
-; RV32-NEXT: lh a1, 0(a1)
-; RV32-NEXT: vmv.s.x v11, a1
-; RV32-NEXT: vsetivli zero, 7, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v10, v11, 6
-; RV32-NEXT: andi a0, a0, -128
-; RV32-NEXT: beqz a0, .LBB63_8
-; RV32-NEXT: .LBB63_16: # %cond.load19
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 7
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vslideup.vi v10, v8, 7
-; RV32-NEXT: vmv1r.v v8, v10
+; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
+; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
+; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_v8bf16:
; RV64V: # %bb.0:
-; RV64V-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64V-NEXT: vmv.x.s a0, v0
-; RV64V-NEXT: andi a1, a0, 1
-; RV64V-NEXT: bnez a1, .LBB63_11
-; RV64V-NEXT: # %bb.1: # %else
-; RV64V-NEXT: andi a1, a0, 2
-; RV64V-NEXT: bnez a1, .LBB63_12
-; RV64V-NEXT: .LBB63_2: # %else2
-; RV64V-NEXT: andi a1, a0, 4
-; RV64V-NEXT: bnez a1, .LBB63_13
-; RV64V-NEXT: .LBB63_3: # %else5
-; RV64V-NEXT: andi a1, a0, 8
-; RV64V-NEXT: beqz a1, .LBB63_5
-; RV64V-NEXT: .LBB63_4: # %cond.load7
-; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-NEXT: vslidedown.vi v14, v8, 3
-; RV64V-NEXT: vmv.x.s a1, v14
-; RV64V-NEXT: lh a1, 0(a1)
-; RV64V-NEXT: vmv.s.x v13, a1
-; RV64V-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v12, v13, 3
-; RV64V-NEXT: .LBB63_5: # %else8
-; RV64V-NEXT: addi sp, sp, -320
-; RV64V-NEXT: .cfi_def_cfa_offset 320
-; RV64V-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
-; RV64V-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
-; RV64V-NEXT: .cfi_offset ra, -8
-; RV64V-NEXT: .cfi_offset s0, -16
-; RV64V-NEXT: addi s0, sp, 320
-; RV64V-NEXT: .cfi_def_cfa s0, 0
-; RV64V-NEXT: andi sp, sp, -64
-; RV64V-NEXT: andi a1, a0, 16
-; RV64V-NEXT: bnez a1, .LBB63_14
-; RV64V-NEXT: # %bb.6: # %else11
-; RV64V-NEXT: andi a1, a0, 32
-; RV64V-NEXT: bnez a1, .LBB63_15
-; RV64V-NEXT: .LBB63_7: # %else14
-; RV64V-NEXT: andi a1, a0, 64
-; RV64V-NEXT: bnez a1, .LBB63_16
-; RV64V-NEXT: .LBB63_8: # %else17
-; RV64V-NEXT: andi a0, a0, -128
-; RV64V-NEXT: beqz a0, .LBB63_10
-; RV64V-NEXT: .LBB63_9: # %cond.load19
-; RV64V-NEXT: mv a0, sp
-; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-NEXT: vse64.v v8, (a0)
-; RV64V-NEXT: ld a0, 56(sp)
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-NEXT: vslideup.vi v12, v8, 7
-; RV64V-NEXT: .LBB63_10: # %else20
-; RV64V-NEXT: vmv1r.v v8, v12
-; RV64V-NEXT: addi sp, s0, -320
-; RV64V-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
-; RV64V-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
-; RV64V-NEXT: addi sp, sp, 320
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, mu
+; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
+; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
-; RV64V-NEXT: .LBB63_11: # %cond.load
-; RV64V-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV64V-NEXT: vmv.x.s a1, v8
-; RV64V-NEXT: lh a1, 0(a1)
-; RV64V-NEXT: vsetvli zero, zero, e16, m2, tu, ma
-; RV64V-NEXT: vmv.s.x v12, a1
-; RV64V-NEXT: andi a1, a0, 2
-; RV64V-NEXT: beqz a1, .LBB63_2
-; RV64V-NEXT: .LBB63_12: # %cond.load1
-; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v13, v8, 1
-; RV64V-NEXT: vmv.x.s a1, v13
-; RV64V-NEXT: lh a1, 0(a1)
-; RV64V-NEXT: vmv.s.x v13, a1
-; RV64V-NEXT: vsetivli zero, 2, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v12, v13, 1
-; RV64V-NEXT: andi a1, a0, 4
-; RV64V-NEXT: beqz a1, .LBB63_3
-; RV64V-NEXT: .LBB63_13: # %cond.load4
-; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-NEXT: vslidedown.vi v14, v8, 2
-; RV64V-NEXT: vmv.x.s a1, v14
-; RV64V-NEXT: lh a1, 0(a1)
-; RV64V-NEXT: vmv.s.x v13, a1
-; RV64V-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v12, v13, 2
-; RV64V-NEXT: andi a1, a0, 8
-; RV64V-NEXT: bnez a1, .LBB63_4
-; RV64V-NEXT: j .LBB63_5
-; RV64V-NEXT: .LBB63_14: # %cond.load10
-; RV64V-NEXT: addi a1, sp, 192
-; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-NEXT: vse64.v v8, (a1)
-; RV64V-NEXT: ld a1, 224(sp)
-; RV64V-NEXT: lh a1, 0(a1)
-; RV64V-NEXT: vmv.s.x v13, a1
-; RV64V-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v12, v13, 4
-; RV64V-NEXT: andi a1, a0, 32
-; RV64V-NEXT: beqz a1, .LBB63_7
-; RV64V-NEXT: .LBB63_15: # %cond.load13
-; RV64V-NEXT: addi a1, sp, 128
-; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-NEXT: vse64.v v8, (a1)
-; RV64V-NEXT: ld a1, 168(sp)
-; RV64V-NEXT: lh a1, 0(a1)
-; RV64V-NEXT: vmv.s.x v13, a1
-; RV64V-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v12, v13, 5
-; RV64V-NEXT: andi a1, a0, 64
-; RV64V-NEXT: beqz a1, .LBB63_8
-; RV64V-NEXT: .LBB63_16: # %cond.load16
-; RV64V-NEXT: addi a1, sp, 64
-; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-NEXT: vse64.v v8, (a1)
-; RV64V-NEXT: ld a1, 112(sp)
-; RV64V-NEXT: lh a1, 0(a1)
-; RV64V-NEXT: vmv.s.x v13, a1
-; RV64V-NEXT: vsetivli zero, 7, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v12, v13, 6
-; RV64V-NEXT: andi a0, a0, -128
-; RV64V-NEXT: bnez a0, .LBB63_9
-; RV64V-NEXT: j .LBB63_10
;
; RV64ZVE32F-LABEL: mgather_v8bf16:
; RV64ZVE32F: # %bb.0:
@@ -7655,112 +7214,9 @@ define <8 x bfloat> @mgather_baseidx_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vsext.vf4 v10, v8
; RV32-NEXT: vadd.vv v10, v10, v10
-; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vadd.vx v10, v10, a0
-; RV32-NEXT: bnez a2, .LBB64_9
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a0, a1, 2
-; RV32-NEXT: bnez a0, .LBB64_10
-; RV32-NEXT: .LBB64_2: # %else2
-; RV32-NEXT: andi a0, a1, 4
-; RV32-NEXT: bnez a0, .LBB64_11
-; RV32-NEXT: .LBB64_3: # %else5
-; RV32-NEXT: andi a0, a1, 8
-; RV32-NEXT: bnez a0, .LBB64_12
-; RV32-NEXT: .LBB64_4: # %else8
-; RV32-NEXT: andi a0, a1, 16
-; RV32-NEXT: bnez a0, .LBB64_13
-; RV32-NEXT: .LBB64_5: # %else11
-; RV32-NEXT: andi a0, a1, 32
-; RV32-NEXT: bnez a0, .LBB64_14
-; RV32-NEXT: .LBB64_6: # %else14
-; RV32-NEXT: andi a0, a1, 64
-; RV32-NEXT: bnez a0, .LBB64_15
-; RV32-NEXT: .LBB64_7: # %else17
-; RV32-NEXT: andi a0, a1, -128
-; RV32-NEXT: bnez a0, .LBB64_16
-; RV32-NEXT: .LBB64_8: # %else20
-; RV32-NEXT: vmv1r.v v8, v9
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB64_9: # %cond.load
-; RV32-NEXT: vmv.x.s a0, v10
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vsetvli zero, zero, e16, m1, tu, ma
-; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: andi a0, a1, 2
-; RV32-NEXT: beqz a0, .LBB64_2
-; RV32-NEXT: .LBB64_10: # %cond.load1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v10, 1
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v9, v8, 1
-; RV32-NEXT: andi a0, a1, 4
-; RV32-NEXT: beqz a0, .LBB64_3
-; RV32-NEXT: .LBB64_11: # %cond.load4
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v10, 2
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v9, v8, 2
-; RV32-NEXT: andi a0, a1, 8
-; RV32-NEXT: beqz a0, .LBB64_4
-; RV32-NEXT: .LBB64_12: # %cond.load7
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v10, 3
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v9, v8, 3
-; RV32-NEXT: andi a0, a1, 16
-; RV32-NEXT: beqz a0, .LBB64_5
-; RV32-NEXT: .LBB64_13: # %cond.load10
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v10, 4
-; RV32-NEXT: vmv.x.s a0, v12
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v9, v8, 4
-; RV32-NEXT: andi a0, a1, 32
-; RV32-NEXT: beqz a0, .LBB64_6
-; RV32-NEXT: .LBB64_14: # %cond.load13
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v10, 5
-; RV32-NEXT: vmv.x.s a0, v12
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v9, v8, 5
-; RV32-NEXT: andi a0, a1, 64
-; RV32-NEXT: beqz a0, .LBB64_7
-; RV32-NEXT: .LBB64_15: # %cond.load16
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v10, 6
-; RV32-NEXT: vmv.x.s a0, v12
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 7, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v9, v8, 6
-; RV32-NEXT: andi a0, a1, -128
-; RV32-NEXT: beqz a0, .LBB64_8
-; RV32-NEXT: .LBB64_16: # %cond.load19
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v10, 7
-; RV32-NEXT: vmv.x.s a0, v10
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vslideup.vi v9, v8, 7
-; RV32-NEXT: vmv1r.v v8, v9
+; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
+; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i8_v8bf16:
@@ -7768,128 +7224,10 @@ define <8 x bfloat> @mgather_baseidx_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf8 v12, v8
; RV64V-NEXT: vadd.vv v12, v12, v12
-; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64V-NEXT: vmv.x.s a1, v0
-; RV64V-NEXT: andi a2, a1, 1
-; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64V-NEXT: vadd.vx v12, v12, a0
-; RV64V-NEXT: bnez a2, .LBB64_11
-; RV64V-NEXT: # %bb.1: # %else
-; RV64V-NEXT: andi a0, a1, 2
-; RV64V-NEXT: bnez a0, .LBB64_12
-; RV64V-NEXT: .LBB64_2: # %else2
-; RV64V-NEXT: andi a0, a1, 4
-; RV64V-NEXT: bnez a0, .LBB64_13
-; RV64V-NEXT: .LBB64_3: # %else5
-; RV64V-NEXT: andi a0, a1, 8
-; RV64V-NEXT: beqz a0, .LBB64_5
-; RV64V-NEXT: .LBB64_4: # %cond.load7
-; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-NEXT: vslidedown.vi v10, v12, 3
-; RV64V-NEXT: vmv.x.s a0, v10
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 3
-; RV64V-NEXT: .LBB64_5: # %else8
-; RV64V-NEXT: addi sp, sp, -320
-; RV64V-NEXT: .cfi_def_cfa_offset 320
-; RV64V-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
-; RV64V-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
-; RV64V-NEXT: .cfi_offset ra, -8
-; RV64V-NEXT: .cfi_offset s0, -16
-; RV64V-NEXT: addi s0, sp, 320
-; RV64V-NEXT: .cfi_def_cfa s0, 0
-; RV64V-NEXT: andi sp, sp, -64
-; RV64V-NEXT: andi a0, a1, 16
-; RV64V-NEXT: bnez a0, .LBB64_14
-; RV64V-NEXT: # %bb.6: # %else11
-; RV64V-NEXT: andi a0, a1, 32
-; RV64V-NEXT: bnez a0, .LBB64_15
-; RV64V-NEXT: .LBB64_7: # %else14
-; RV64V-NEXT: andi a0, a1, 64
-; RV64V-NEXT: bnez a0, .LBB64_16
-; RV64V-NEXT: .LBB64_8: # %else17
-; RV64V-NEXT: andi a0, a1, -128
-; RV64V-NEXT: beqz a0, .LBB64_10
-; RV64V-NEXT: .LBB64_9: # %cond.load19
-; RV64V-NEXT: mv a0, sp
-; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 56(sp)
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 7
-; RV64V-NEXT: .LBB64_10: # %else20
-; RV64V-NEXT: vmv1r.v v8, v9
-; RV64V-NEXT: addi sp, s0, -320
-; RV64V-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
-; RV64V-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
-; RV64V-NEXT: addi sp, sp, 320
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
+; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
-; RV64V-NEXT: .LBB64_11: # %cond.load
-; RV64V-NEXT: vmv.x.s a0, v12
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vsetvli zero, zero, e16, m1, tu, ma
-; RV64V-NEXT: vmv.s.x v9, a0
-; RV64V-NEXT: andi a0, a1, 2
-; RV64V-NEXT: beqz a0, .LBB64_2
-; RV64V-NEXT: .LBB64_12: # %cond.load1
-; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v8, v12, 1
-; RV64V-NEXT: vmv.x.s a0, v8
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetivli zero, 2, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 1
-; RV64V-NEXT: andi a0, a1, 4
-; RV64V-NEXT: beqz a0, .LBB64_3
-; RV64V-NEXT: .LBB64_13: # %cond.load4
-; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-NEXT: vslidedown.vi v10, v12, 2
-; RV64V-NEXT: vmv.x.s a0, v10
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 2
-; RV64V-NEXT: andi a0, a1, 8
-; RV64V-NEXT: bnez a0, .LBB64_4
-; RV64V-NEXT: j .LBB64_5
-; RV64V-NEXT: .LBB64_14: # %cond.load10
-; RV64V-NEXT: addi a0, sp, 192
-; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 224(sp)
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 4
-; RV64V-NEXT: andi a0, a1, 32
-; RV64V-NEXT: beqz a0, .LBB64_7
-; RV64V-NEXT: .LBB64_15: # %cond.load13
-; RV64V-NEXT: addi a0, sp, 128
-; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 168(sp)
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 5
-; RV64V-NEXT: andi a0, a1, 64
-; RV64V-NEXT: beqz a0, .LBB64_8
-; RV64V-NEXT: .LBB64_16: # %cond.load16
-; RV64V-NEXT: addi a0, sp, 64
-; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 112(sp)
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetivli zero, 7, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 6
-; RV64V-NEXT: andi a0, a1, -128
-; RV64V-NEXT: bnez a0, .LBB64_9
-; RV64V-NEXT: j .LBB64_10
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8bf16:
; RV64ZVE32F: # %bb.0:
@@ -8028,112 +7366,9 @@ define <8 x bfloat> @mgather_baseidx_sext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs,
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vsext.vf4 v10, v8
; RV32-NEXT: vadd.vv v10, v10, v10
-; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vadd.vx v10, v10, a0
-; RV32-NEXT: bnez a2, .LBB65_9
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a0, a1, 2
-; RV32-NEXT: bnez a0, .LBB65_10
-; RV32-NEXT: .LBB65_2: # %else2
-; RV32-NEXT: andi a0, a1, 4
-; RV32-NEXT: bnez a0, .LBB65_11
-; RV32-NEXT: .LBB65_3: # %else5
-; RV32-NEXT: andi a0, a1, 8
-; RV32-NEXT: bnez a0, .LBB65_12
-; RV32-NEXT: .LBB65_4: # %else8
-; RV32-NEXT: andi a0, a1, 16
-; RV32-NEXT: bnez a0, .LBB65_13
-; RV32-NEXT: .LBB65_5: # %else11
-; RV32-NEXT: andi a0, a1, 32
-; RV32-NEXT: bnez a0, .LBB65_14
-; RV32-NEXT: .LBB65_6: # %else14
-; RV32-NEXT: andi a0, a1, 64
-; RV32-NEXT: bnez a0, .LBB65_15
-; RV32-NEXT: .LBB65_7: # %else17
-; RV32-NEXT: andi a0, a1, -128
-; RV32-NEXT: bnez a0, .LBB65_16
-; RV32-NEXT: .LBB65_8: # %else20
-; RV32-NEXT: vmv1r.v v8, v9
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB65_9: # %cond.load
-; RV32-NEXT: vmv.x.s a0, v10
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vsetvli zero, zero, e16, m1, tu, ma
-; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: andi a0, a1, 2
-; RV32-NEXT: beqz a0, .LBB65_2
-; RV32-NEXT: .LBB65_10: # %cond.load1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v10, 1
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v9, v8, 1
-; RV32-NEXT: andi a0, a1, 4
-; RV32-NEXT: beqz a0, .LBB65_3
-; RV32-NEXT: .LBB65_11: # %cond.load4
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v10, 2
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v9, v8, 2
-; RV32-NEXT: andi a0, a1, 8
-; RV32-NEXT: beqz a0, .LBB65_4
-; RV32-NEXT: .LBB65_12: # %cond.load7
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v10, 3
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v9, v8, 3
-; RV32-NEXT: andi a0, a1, 16
-; RV32-NEXT: beqz a0, .LBB65_5
-; RV32-NEXT: .LBB65_13: # %cond.load10
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v10, 4
-; RV32-NEXT: vmv.x.s a0, v12
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v9, v8, 4
-; RV32-NEXT: andi a0, a1, 32
-; RV32-NEXT: beqz a0, .LBB65_6
-; RV32-NEXT: .LBB65_14: # %cond.load13
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v10, 5
-; RV32-NEXT: vmv.x.s a0, v12
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v9, v8, 5
-; RV32-NEXT: andi a0, a1, 64
-; RV32-NEXT: beqz a0, .LBB65_7
-; RV32-NEXT: .LBB65_15: # %cond.load16
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v10, 6
-; RV32-NEXT: vmv.x.s a0, v12
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 7, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v9, v8, 6
-; RV32-NEXT: andi a0, a1, -128
-; RV32-NEXT: beqz a0, .LBB65_8
-; RV32-NEXT: .LBB65_16: # %cond.load19
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v10, 7
-; RV32-NEXT: vmv.x.s a0, v10
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vslideup.vi v9, v8, 7
-; RV32-NEXT: vmv1r.v v8, v9
+; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
+; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8bf16:
@@ -8141,128 +7376,10 @@ define <8 x bfloat> @mgather_baseidx_sext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs,
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf8 v12, v8
; RV64V-NEXT: vadd.vv v12, v12, v12
-; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64V-NEXT: vmv.x.s a1, v0
-; RV64V-NEXT: andi a2, a1, 1
-; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64V-NEXT: vadd.vx v12, v12, a0
-; RV64V-NEXT: bnez a2, .LBB65_11
-; RV64V-NEXT: # %bb.1: # %else
-; RV64V-NEXT: andi a0, a1, 2
-; RV64V-NEXT: bnez a0, .LBB65_12
-; RV64V-NEXT: .LBB65_2: # %else2
-; RV64V-NEXT: andi a0, a1, 4
-; RV64V-NEXT: bnez a0, .LBB65_13
-; RV64V-NEXT: .LBB65_3: # %else5
-; RV64V-NEXT: andi a0, a1, 8
-; RV64V-NEXT: beqz a0, .LBB65_5
-; RV64V-NEXT: .LBB65_4: # %cond.load7
-; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-NEXT: vslidedown.vi v10, v12, 3
-; RV64V-NEXT: vmv.x.s a0, v10
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 3
-; RV64V-NEXT: .LBB65_5: # %else8
-; RV64V-NEXT: addi sp, sp, -320
-; RV64V-NEXT: .cfi_def_cfa_offset 320
-; RV64V-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
-; RV64V-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
-; RV64V-NEXT: .cfi_offset ra, -8
-; RV64V-NEXT: .cfi_offset s0, -16
-; RV64V-NEXT: addi s0, sp, 320
-; RV64V-NEXT: .cfi_def_cfa s0, 0
-; RV64V-NEXT: andi sp, sp, -64
-; RV64V-NEXT: andi a0, a1, 16
-; RV64V-NEXT: bnez a0, .LBB65_14
-; RV64V-NEXT: # %bb.6: # %else11
-; RV64V-NEXT: andi a0, a1, 32
-; RV64V-NEXT: bnez a0, .LBB65_15
-; RV64V-NEXT: .LBB65_7: # %else14
-; RV64V-NEXT: andi a0, a1, 64
-; RV64V-NEXT: bnez a0, .LBB65_16
-; RV64V-NEXT: .LBB65_8: # %else17
-; RV64V-NEXT: andi a0, a1, -128
-; RV64V-NEXT: beqz a0, .LBB65_10
-; RV64V-NEXT: .LBB65_9: # %cond.load19
-; RV64V-NEXT: mv a0, sp
-; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 56(sp)
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 7
-; RV64V-NEXT: .LBB65_10: # %else20
-; RV64V-NEXT: vmv1r.v v8, v9
-; RV64V-NEXT: addi sp, s0, -320
-; RV64V-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
-; RV64V-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
-; RV64V-NEXT: addi sp, sp, 320
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
+; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
-; RV64V-NEXT: .LBB65_11: # %cond.load
-; RV64V-NEXT: vmv.x.s a0, v12
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vsetvli zero, zero, e16, m1, tu, ma
-; RV64V-NEXT: vmv.s.x v9, a0
-; RV64V-NEXT: andi a0, a1, 2
-; RV64V-NEXT: beqz a0, .LBB65_2
-; RV64V-NEXT: .LBB65_12: # %cond.load1
-; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v8, v12, 1
-; RV64V-NEXT: vmv.x.s a0, v8
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetivli zero, 2, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 1
-; RV64V-NEXT: andi a0, a1, 4
-; RV64V-NEXT: beqz a0, .LBB65_3
-; RV64V-NEXT: .LBB65_13: # %cond.load4
-; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-NEXT: vslidedown.vi v10, v12, 2
-; RV64V-NEXT: vmv.x.s a0, v10
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 2
-; RV64V-NEXT: andi a0, a1, 8
-; RV64V-NEXT: bnez a0, .LBB65_4
-; RV64V-NEXT: j .LBB65_5
-; RV64V-NEXT: .LBB65_14: # %cond.load10
-; RV64V-NEXT: addi a0, sp, 192
-; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 224(sp)
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 4
-; RV64V-NEXT: andi a0, a1, 32
-; RV64V-NEXT: beqz a0, .LBB65_7
-; RV64V-NEXT: .LBB65_15: # %cond.load13
-; RV64V-NEXT: addi a0, sp, 128
-; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 168(sp)
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 5
-; RV64V-NEXT: andi a0, a1, 64
-; RV64V-NEXT: beqz a0, .LBB65_8
-; RV64V-NEXT: .LBB65_16: # %cond.load16
-; RV64V-NEXT: addi a0, sp, 64
-; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 112(sp)
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetivli zero, 7, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 6
-; RV64V-NEXT: andi a0, a1, -128
-; RV64V-NEXT: bnez a0, .LBB65_9
-; RV64V-NEXT: j .LBB65_10
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8bf16:
; RV64ZVE32F: # %bb.0:
@@ -8399,244 +7516,21 @@ define <8 x bfloat> @mgather_baseidx_sext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs,
define <8 x bfloat> @mgather_baseidx_zext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x bfloat> %passthru) {
; RV32-LABEL: mgather_baseidx_zext_v8i8_v8bf16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vzext.vf4 v10, v8
-; RV32-NEXT: vadd.vv v10, v10, v10
-; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vadd.vx v10, v10, a0
-; RV32-NEXT: bnez a2, .LBB66_9
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a0, a1, 2
-; RV32-NEXT: bnez a0, .LBB66_10
-; RV32-NEXT: .LBB66_2: # %else2
-; RV32-NEXT: andi a0, a1, 4
-; RV32-NEXT: bnez a0, .LBB66_11
-; RV32-NEXT: .LBB66_3: # %else5
-; RV32-NEXT: andi a0, a1, 8
-; RV32-NEXT: bnez a0, .LBB66_12
-; RV32-NEXT: .LBB66_4: # %else8
-; RV32-NEXT: andi a0, a1, 16
-; RV32-NEXT: bnez a0, .LBB66_13
-; RV32-NEXT: .LBB66_5: # %else11
-; RV32-NEXT: andi a0, a1, 32
-; RV32-NEXT: bnez a0, .LBB66_14
-; RV32-NEXT: .LBB66_6: # %else14
-; RV32-NEXT: andi a0, a1, 64
-; RV32-NEXT: bnez a0, .LBB66_15
-; RV32-NEXT: .LBB66_7: # %else17
-; RV32-NEXT: andi a0, a1, -128
-; RV32-NEXT: bnez a0, .LBB66_16
-; RV32-NEXT: .LBB66_8: # %else20
-; RV32-NEXT: vmv1r.v v8, v9
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB66_9: # %cond.load
-; RV32-NEXT: vmv.x.s a0, v10
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vsetvli zero, zero, e16, m1, tu, ma
-; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: andi a0, a1, 2
-; RV32-NEXT: beqz a0, .LBB66_2
-; RV32-NEXT: .LBB66_10: # %cond.load1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v10, 1
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v9, v8, 1
-; RV32-NEXT: andi a0, a1, 4
-; RV32-NEXT: beqz a0, .LBB66_3
-; RV32-NEXT: .LBB66_11: # %cond.load4
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v10, 2
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v9, v8, 2
-; RV32-NEXT: andi a0, a1, 8
-; RV32-NEXT: beqz a0, .LBB66_4
-; RV32-NEXT: .LBB66_12: # %cond.load7
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v10, 3
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v9, v8, 3
-; RV32-NEXT: andi a0, a1, 16
-; RV32-NEXT: beqz a0, .LBB66_5
-; RV32-NEXT: .LBB66_13: # %cond.load10
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v10, 4
-; RV32-NEXT: vmv.x.s a0, v12
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v9, v8, 4
-; RV32-NEXT: andi a0, a1, 32
-; RV32-NEXT: beqz a0, .LBB66_6
-; RV32-NEXT: .LBB66_14: # %cond.load13
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v10, 5
-; RV32-NEXT: vmv.x.s a0, v12
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v9, v8, 5
-; RV32-NEXT: andi a0, a1, 64
-; RV32-NEXT: beqz a0, .LBB66_7
-; RV32-NEXT: .LBB66_15: # %cond.load16
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v10, 6
-; RV32-NEXT: vmv.x.s a0, v12
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 7, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v9, v8, 6
-; RV32-NEXT: andi a0, a1, -128
-; RV32-NEXT: beqz a0, .LBB66_8
-; RV32-NEXT: .LBB66_16: # %cond.load19
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v10, 7
-; RV32-NEXT: vmv.x.s a0, v10
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vslideup.vi v9, v8, 7
-; RV32-NEXT: vmv1r.v v8, v9
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vwaddu.vv v10, v8, v8
+; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; RV32-NEXT: vluxei16.v v9, (a0), v10, v0.t
+; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8bf16:
; RV64V: # %bb.0:
-; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-NEXT: vzext.vf8 v12, v8
-; RV64V-NEXT: vadd.vv v12, v12, v12
-; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64V-NEXT: vmv.x.s a1, v0
-; RV64V-NEXT: andi a2, a1, 1
-; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64V-NEXT: vadd.vx v12, v12, a0
-; RV64V-NEXT: bnez a2, .LBB66_11
-; RV64V-NEXT: # %bb.1: # %else
-; RV64V-NEXT: andi a0, a1, 2
-; RV64V-NEXT: bnez a0, .LBB66_12
-; RV64V-NEXT: .LBB66_2: # %else2
-; RV64V-NEXT: andi a0, a1, 4
-; RV64V-NEXT: bnez a0, .LBB66_13
-; RV64V-NEXT: .LBB66_3: # %else5
-; RV64V-NEXT: andi a0, a1, 8
-; RV64V-NEXT: beqz a0, .LBB66_5
-; RV64V-NEXT: .LBB66_4: # %cond.load7
-; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-NEXT: vslidedown.vi v10, v12, 3
-; RV64V-NEXT: vmv.x.s a0, v10
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 3
-; RV64V-NEXT: .LBB66_5: # %else8
-; RV64V-NEXT: addi sp, sp, -320
-; RV64V-NEXT: .cfi_def_cfa_offset 320
-; RV64V-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
-; RV64V-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
-; RV64V-NEXT: .cfi_offset ra, -8
-; RV64V-NEXT: .cfi_offset s0, -16
-; RV64V-NEXT: addi s0, sp, 320
-; RV64V-NEXT: .cfi_def_cfa s0, 0
-; RV64V-NEXT: andi sp, sp, -64
-; RV64V-NEXT: andi a0, a1, 16
-; RV64V-NEXT: bnez a0, .LBB66_14
-; RV64V-NEXT: # %bb.6: # %else11
-; RV64V-NEXT: andi a0, a1, 32
-; RV64V-NEXT: bnez a0, .LBB66_15
-; RV64V-NEXT: .LBB66_7: # %else14
-; RV64V-NEXT: andi a0, a1, 64
-; RV64V-NEXT: bnez a0, .LBB66_16
-; RV64V-NEXT: .LBB66_8: # %else17
-; RV64V-NEXT: andi a0, a1, -128
-; RV64V-NEXT: beqz a0, .LBB66_10
-; RV64V-NEXT: .LBB66_9: # %cond.load19
-; RV64V-NEXT: mv a0, sp
-; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 56(sp)
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 7
-; RV64V-NEXT: .LBB66_10: # %else20
-; RV64V-NEXT: vmv1r.v v8, v9
-; RV64V-NEXT: addi sp, s0, -320
-; RV64V-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
-; RV64V-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
-; RV64V-NEXT: addi sp, sp, 320
+; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64V-NEXT: vwaddu.vv v10, v8, v8
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; RV64V-NEXT: vluxei16.v v9, (a0), v10, v0.t
+; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
-; RV64V-NEXT: .LBB66_11: # %cond.load
-; RV64V-NEXT: vmv.x.s a0, v12
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vsetvli zero, zero, e16, m1, tu, ma
-; RV64V-NEXT: vmv.s.x v9, a0
-; RV64V-NEXT: andi a0, a1, 2
-; RV64V-NEXT: beqz a0, .LBB66_2
-; RV64V-NEXT: .LBB66_12: # %cond.load1
-; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v8, v12, 1
-; RV64V-NEXT: vmv.x.s a0, v8
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetivli zero, 2, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 1
-; RV64V-NEXT: andi a0, a1, 4
-; RV64V-NEXT: beqz a0, .LBB66_3
-; RV64V-NEXT: .LBB66_13: # %cond.load4
-; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-NEXT: vslidedown.vi v10, v12, 2
-; RV64V-NEXT: vmv.x.s a0, v10
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 2
-; RV64V-NEXT: andi a0, a1, 8
-; RV64V-NEXT: bnez a0, .LBB66_4
-; RV64V-NEXT: j .LBB66_5
-; RV64V-NEXT: .LBB66_14: # %cond.load10
-; RV64V-NEXT: addi a0, sp, 192
-; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 224(sp)
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 4
-; RV64V-NEXT: andi a0, a1, 32
-; RV64V-NEXT: beqz a0, .LBB66_7
-; RV64V-NEXT: .LBB66_15: # %cond.load13
-; RV64V-NEXT: addi a0, sp, 128
-; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 168(sp)
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 5
-; RV64V-NEXT: andi a0, a1, 64
-; RV64V-NEXT: beqz a0, .LBB66_8
-; RV64V-NEXT: .LBB66_16: # %cond.load16
-; RV64V-NEXT: addi a0, sp, 64
-; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 112(sp)
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetivli zero, 7, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 6
-; RV64V-NEXT: andi a0, a1, -128
-; RV64V-NEXT: bnez a0, .LBB66_9
-; RV64V-NEXT: j .LBB66_10
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8bf16:
; RV64ZVE32F: # %bb.0:
@@ -8781,114 +7675,10 @@ define <8 x bfloat> @mgather_baseidx_zext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs,
define <8 x bfloat> @mgather_baseidx_v8bf16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x bfloat> %passthru) {
; RV32-LABEL: mgather_baseidx_v8bf16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV32-NEXT: vwadd.vv v10, v8, v8
-; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vadd.vx v10, v10, a0
-; RV32-NEXT: bnez a2, .LBB67_9
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a0, a1, 2
-; RV32-NEXT: bnez a0, .LBB67_10
-; RV32-NEXT: .LBB67_2: # %else2
-; RV32-NEXT: andi a0, a1, 4
-; RV32-NEXT: bnez a0, .LBB67_11
-; RV32-NEXT: .LBB67_3: # %else5
-; RV32-NEXT: andi a0, a1, 8
-; RV32-NEXT: bnez a0, .LBB67_12
-; RV32-NEXT: .LBB67_4: # %else8
-; RV32-NEXT: andi a0, a1, 16
-; RV32-NEXT: bnez a0, .LBB67_13
-; RV32-NEXT: .LBB67_5: # %else11
-; RV32-NEXT: andi a0, a1, 32
-; RV32-NEXT: bnez a0, .LBB67_14
-; RV32-NEXT: .LBB67_6: # %else14
-; RV32-NEXT: andi a0, a1, 64
-; RV32-NEXT: bnez a0, .LBB67_15
-; RV32-NEXT: .LBB67_7: # %else17
-; RV32-NEXT: andi a0, a1, -128
-; RV32-NEXT: bnez a0, .LBB67_16
-; RV32-NEXT: .LBB67_8: # %else20
-; RV32-NEXT: vmv1r.v v8, v9
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB67_9: # %cond.load
-; RV32-NEXT: vmv.x.s a0, v10
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vsetvli zero, zero, e16, m1, tu, ma
-; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: andi a0, a1, 2
-; RV32-NEXT: beqz a0, .LBB67_2
-; RV32-NEXT: .LBB67_10: # %cond.load1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v10, 1
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v9, v8, 1
-; RV32-NEXT: andi a0, a1, 4
-; RV32-NEXT: beqz a0, .LBB67_3
-; RV32-NEXT: .LBB67_11: # %cond.load4
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v10, 2
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v9, v8, 2
-; RV32-NEXT: andi a0, a1, 8
-; RV32-NEXT: beqz a0, .LBB67_4
-; RV32-NEXT: .LBB67_12: # %cond.load7
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v10, 3
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v9, v8, 3
-; RV32-NEXT: andi a0, a1, 16
-; RV32-NEXT: beqz a0, .LBB67_5
-; RV32-NEXT: .LBB67_13: # %cond.load10
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v10, 4
-; RV32-NEXT: vmv.x.s a0, v12
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v9, v8, 4
-; RV32-NEXT: andi a0, a1, 32
-; RV32-NEXT: beqz a0, .LBB67_6
-; RV32-NEXT: .LBB67_14: # %cond.load13
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v10, 5
-; RV32-NEXT: vmv.x.s a0, v12
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v9, v8, 5
-; RV32-NEXT: andi a0, a1, 64
-; RV32-NEXT: beqz a0, .LBB67_7
-; RV32-NEXT: .LBB67_15: # %cond.load16
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v10, 6
-; RV32-NEXT: vmv.x.s a0, v12
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 7, e16, m1, tu, ma
-; RV32-NEXT: vslideup.vi v9, v8, 6
-; RV32-NEXT: andi a0, a1, -128
-; RV32-NEXT: beqz a0, .LBB67_8
-; RV32-NEXT: .LBB67_16: # %cond.load19
-; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v10, 7
-; RV32-NEXT: vmv.x.s a0, v10
-; RV32-NEXT: lh a0, 0(a0)
-; RV32-NEXT: vmv.s.x v8, a0
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vslideup.vi v9, v8, 7
-; RV32-NEXT: vmv1r.v v8, v9
+; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
+; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8bf16:
@@ -8896,128 +7686,10 @@ define <8 x bfloat> @mgather_baseidx_v8bf16(ptr %base, <8 x i16> %idxs, <8 x i1>
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf4 v12, v8
; RV64V-NEXT: vadd.vv v12, v12, v12
-; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64V-NEXT: vmv.x.s a1, v0
-; RV64V-NEXT: andi a2, a1, 1
-; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64V-NEXT: vadd.vx v12, v12, a0
-; RV64V-NEXT: bnez a2, .LBB67_11
-; RV64V-NEXT: # %bb.1: # %else
-; RV64V-NEXT: andi a0, a1, 2
-; RV64V-NEXT: bnez a0, .LBB67_12
-; RV64V-NEXT: .LBB67_2: # %else2
-; RV64V-NEXT: andi a0, a1, 4
-; RV64V-NEXT: bnez a0, .LBB67_13
-; RV64V-NEXT: .LBB67_3: # %else5
-; RV64V-NEXT: andi a0, a1, 8
-; RV64V-NEXT: beqz a0, .LBB67_5
-; RV64V-NEXT: .LBB67_4: # %cond.load7
-; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-NEXT: vslidedown.vi v10, v12, 3
-; RV64V-NEXT: vmv.x.s a0, v10
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 3
-; RV64V-NEXT: .LBB67_5: # %else8
-; RV64V-NEXT: addi sp, sp, -320
-; RV64V-NEXT: .cfi_def_cfa_offset 320
-; RV64V-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
-; RV64V-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
-; RV64V-NEXT: .cfi_offset ra, -8
-; RV64V-NEXT: .cfi_offset s0, -16
-; RV64V-NEXT: addi s0, sp, 320
-; RV64V-NEXT: .cfi_def_cfa s0, 0
-; RV64V-NEXT: andi sp, sp, -64
-; RV64V-NEXT: andi a0, a1, 16
-; RV64V-NEXT: bnez a0, .LBB67_14
-; RV64V-NEXT: # %bb.6: # %else11
-; RV64V-NEXT: andi a0, a1, 32
-; RV64V-NEXT: bnez a0, .LBB67_15
-; RV64V-NEXT: .LBB67_7: # %else14
-; RV64V-NEXT: andi a0, a1, 64
-; RV64V-NEXT: bnez a0, .LBB67_16
-; RV64V-NEXT: .LBB67_8: # %else17
-; RV64V-NEXT: andi a0, a1, -128
-; RV64V-NEXT: beqz a0, .LBB67_10
-; RV64V-NEXT: .LBB67_9: # %cond.load19
-; RV64V-NEXT: mv a0, sp
-; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 56(sp)
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 7
-; RV64V-NEXT: .LBB67_10: # %else20
-; RV64V-NEXT: vmv1r.v v8, v9
-; RV64V-NEXT: addi sp, s0, -320
-; RV64V-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
-; RV64V-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
-; RV64V-NEXT: addi sp, sp, 320
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
+; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
-; RV64V-NEXT: .LBB67_11: # %cond.load
-; RV64V-NEXT: vmv.x.s a0, v12
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vsetvli zero, zero, e16, m1, tu, ma
-; RV64V-NEXT: vmv.s.x v9, a0
-; RV64V-NEXT: andi a0, a1, 2
-; RV64V-NEXT: beqz a0, .LBB67_2
-; RV64V-NEXT: .LBB67_12: # %cond.load1
-; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v8, v12, 1
-; RV64V-NEXT: vmv.x.s a0, v8
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetivli zero, 2, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 1
-; RV64V-NEXT: andi a0, a1, 4
-; RV64V-NEXT: beqz a0, .LBB67_3
-; RV64V-NEXT: .LBB67_13: # %cond.load4
-; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-NEXT: vslidedown.vi v10, v12, 2
-; RV64V-NEXT: vmv.x.s a0, v10
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 2
-; RV64V-NEXT: andi a0, a1, 8
-; RV64V-NEXT: bnez a0, .LBB67_4
-; RV64V-NEXT: j .LBB67_5
-; RV64V-NEXT: .LBB67_14: # %cond.load10
-; RV64V-NEXT: addi a0, sp, 192
-; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 224(sp)
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 4
-; RV64V-NEXT: andi a0, a1, 32
-; RV64V-NEXT: beqz a0, .LBB67_7
-; RV64V-NEXT: .LBB67_15: # %cond.load13
-; RV64V-NEXT: addi a0, sp, 128
-; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 168(sp)
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 5
-; RV64V-NEXT: andi a0, a1, 64
-; RV64V-NEXT: beqz a0, .LBB67_8
-; RV64V-NEXT: .LBB67_16: # %cond.load16
-; RV64V-NEXT: addi a0, sp, 64
-; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 112(sp)
-; RV64V-NEXT: lh a0, 0(a0)
-; RV64V-NEXT: vmv.s.x v8, a0
-; RV64V-NEXT: vsetivli zero, 7, e16, m1, tu, ma
-; RV64V-NEXT: vslideup.vi v9, v8, 6
-; RV64V-NEXT: andi a0, a1, -128
-; RV64V-NEXT: bnez a0, .LBB67_9
-; RV64V-NEXT: j .LBB67_10
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8bf16:
; RV64ZVE32F: # %bb.0:
@@ -9145,54 +7817,26 @@ define <8 x bfloat> @mgather_baseidx_v8bf16(ptr %base, <8 x i16> %idxs, <8 x i1>
declare <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x half>)
define <1 x half> @mgather_v1f16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x half> %passthru) {
-; RV32V-ZVFH-LABEL: mgather_v1f16:
-; RV32V-ZVFH: # %bb.0:
-; RV32V-ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
-; RV32V-ZVFH-NEXT: vluxei32.v v9, (zero), v8, v0.t
-; RV32V-ZVFH-NEXT: vmv1r.v v8, v9
-; RV32V-ZVFH-NEXT: ret
-;
-; RV64V-ZVFH-LABEL: mgather_v1f16:
-; RV64V-ZVFH: # %bb.0:
-; RV64V-ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
-; RV64V-ZVFH-NEXT: vluxei64.v v9, (zero), v8, v0.t
-; RV64V-ZVFH-NEXT: vmv1r.v v8, v9
-; RV64V-ZVFH-NEXT: ret
-;
-; RV32V-ZVFHMIN-LABEL: mgather_v1f16:
-; RV32V-ZVFHMIN: # %bb.0:
-; RV32V-ZVFHMIN-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; RV32V-ZVFHMIN-NEXT: vfirst.m a0, v0
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB68_2
-; RV32V-ZVFHMIN-NEXT: # %bb.1: # %cond.load
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV32V-ZVFHMIN-NEXT: vle16.v v9, (a0)
-; RV32V-ZVFHMIN-NEXT: .LBB68_2: # %else
-; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV32V-ZVFHMIN-NEXT: ret
-;
-; RV64V-ZVFHMIN-LABEL: mgather_v1f16:
-; RV64V-ZVFHMIN: # %bb.0:
-; RV64V-ZVFHMIN-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; RV64V-ZVFHMIN-NEXT: vfirst.m a0, v0
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB68_2
-; RV64V-ZVFHMIN-NEXT: # %bb.1: # %cond.load
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vle16.v v9, (a0)
-; RV64V-ZVFHMIN-NEXT: .LBB68_2: # %else
-; RV64V-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV64V-ZVFHMIN-NEXT: ret
-;
-; RV32ZVE32F-ZVFH-LABEL: mgather_v1f16:
-; RV32ZVE32F-ZVFH: # %bb.0:
-; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, mu
-; RV32ZVE32F-ZVFH-NEXT: vluxei32.v v9, (zero), v8, v0.t
-; RV32ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9
-; RV32ZVE32F-ZVFH-NEXT: ret
+; RV32V-LABEL: mgather_v1f16:
+; RV32V: # %bb.0:
+; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
+; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
+; RV32V-NEXT: vmv1r.v v8, v9
+; RV32V-NEXT: ret
+;
+; RV64V-LABEL: mgather_v1f16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
+; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
+; RV64V-NEXT: vmv1r.v v8, v9
+; RV64V-NEXT: ret
+;
+; RV32ZVE32F-LABEL: mgather_v1f16:
+; RV32ZVE32F: # %bb.0:
+; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu
+; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
+; RV32ZVE32F-NEXT: vmv1r.v v8, v9
+; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v1f16:
; RV64ZVE32F: # %bb.0:
@@ -9204,20 +7848,6 @@ define <1 x half> @mgather_v1f16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x half> %passt
; RV64ZVE32F-NEXT: vle16.v v8, (a0)
; RV64ZVE32F-NEXT: .LBB68_2: # %else
; RV64ZVE32F-NEXT: ret
-;
-; RV32ZVE32F-ZVFHMIN-LABEL: mgather_v1f16:
-; RV32ZVE32F-ZVFHMIN: # %bb.0:
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vfirst.m a0, v0
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB68_2
-; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.load
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vle16.v v9, (a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_2: # %else
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
%v = call <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr> %ptrs, i32 2, <1 x i1> %m, <1 x half> %passthru)
ret <1 x half> %v
}
@@ -9225,88 +7855,26 @@ define <1 x half> @mgather_v1f16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x half> %passt
declare <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x half>)
define <2 x half> @mgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x half> %passthru) {
-; RV32V-ZVFH-LABEL: mgather_v2f16:
-; RV32V-ZVFH: # %bb.0:
-; RV32V-ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
-; RV32V-ZVFH-NEXT: vluxei32.v v9, (zero), v8, v0.t
-; RV32V-ZVFH-NEXT: vmv1r.v v8, v9
-; RV32V-ZVFH-NEXT: ret
-;
-; RV64V-ZVFH-LABEL: mgather_v2f16:
-; RV64V-ZVFH: # %bb.0:
-; RV64V-ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
-; RV64V-ZVFH-NEXT: vluxei64.v v9, (zero), v8, v0.t
-; RV64V-ZVFH-NEXT: vmv1r.v v8, v9
-; RV64V-ZVFH-NEXT: ret
-;
-; RV32V-ZVFHMIN-LABEL: mgather_v2f16:
-; RV32V-ZVFHMIN: # %bb.0:
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v0
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 1
-; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB69_3
-; RV32V-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV32V-ZVFHMIN-NEXT: andi a0, a0, 2
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB69_4
-; RV32V-ZVFHMIN-NEXT: .LBB69_2: # %else2
-; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV32V-ZVFHMIN-NEXT: ret
-; RV32V-ZVFHMIN-NEXT: .LBB69_3: # %cond.load
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v8
-; RV32V-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v9, a1
-; RV32V-ZVFHMIN-NEXT: andi a0, a0, 2
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB69_2
-; RV32V-ZVFHMIN-NEXT: .LBB69_4: # %cond.load1
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
-; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV32V-ZVFHMIN-NEXT: ret
-;
-; RV64V-ZVFHMIN-LABEL: mgather_v2f16:
-; RV64V-ZVFHMIN: # %bb.0:
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v0
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 1
-; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB69_3
-; RV64V-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV64V-ZVFHMIN-NEXT: andi a0, a0, 2
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB69_4
-; RV64V-ZVFHMIN-NEXT: .LBB69_2: # %else2
-; RV64V-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV64V-ZVFHMIN-NEXT: ret
-; RV64V-ZVFHMIN-NEXT: .LBB69_3: # %cond.load
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v8
-; RV64V-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v9, a1
-; RV64V-ZVFHMIN-NEXT: andi a0, a0, 2
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB69_2
-; RV64V-ZVFHMIN-NEXT: .LBB69_4: # %cond.load1
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
-; RV64V-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV64V-ZVFHMIN-NEXT: ret
-;
-; RV32ZVE32F-ZVFH-LABEL: mgather_v2f16:
-; RV32ZVE32F-ZVFH: # %bb.0:
-; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
-; RV32ZVE32F-ZVFH-NEXT: vluxei32.v v9, (zero), v8, v0.t
-; RV32ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9
-; RV32ZVE32F-ZVFH-NEXT: ret
+; RV32V-LABEL: mgather_v2f16:
+; RV32V: # %bb.0:
+; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
+; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
+; RV32V-NEXT: vmv1r.v v8, v9
+; RV32V-NEXT: ret
+;
+; RV64V-LABEL: mgather_v2f16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
+; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
+; RV64V-NEXT: vmv1r.v v8, v9
+; RV64V-NEXT: ret
+;
+; RV32ZVE32F-LABEL: mgather_v2f16:
+; RV32ZVE32F: # %bb.0:
+; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
+; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
+; RV32ZVE32F-NEXT: vmv1r.v v8, v9
+; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-ZVFH-LABEL: mgather_v2f16:
; RV64ZVE32F-ZVFH: # %bb.0:
@@ -9333,37 +7901,6 @@ define <2 x half> @mgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x half> %passt
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-ZVFH-NEXT: ret
;
-; RV32ZVE32F-ZVFHMIN-LABEL: mgather_v2f16:
-; RV32ZVE32F-ZVFHMIN: # %bb.0:
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v0
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB69_3
-; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a0, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB69_4
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_2: # %else2
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_3: # %cond.load
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a1
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a0, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB69_2
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_4: # %cond.load1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-;
; RV64ZVE32F-ZVFHMIN-LABEL: mgather_v2f16:
; RV64ZVE32F-ZVFHMIN: # %bb.0:
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -9395,140 +7932,19 @@ define <2 x half> @mgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x half> %passt
declare <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x half>)
define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passthru) {
-; RV32V-ZVFH-LABEL: mgather_v4f16:
-; RV32V-ZVFH: # %bb.0:
-; RV32V-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
-; RV32V-ZVFH-NEXT: vluxei32.v v9, (zero), v8, v0.t
-; RV32V-ZVFH-NEXT: vmv1r.v v8, v9
-; RV32V-ZVFH-NEXT: ret
-;
-; RV64V-ZVFH-LABEL: mgather_v4f16:
-; RV64V-ZVFH: # %bb.0:
-; RV64V-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
-; RV64V-ZVFH-NEXT: vluxei64.v v10, (zero), v8, v0.t
-; RV64V-ZVFH-NEXT: vmv1r.v v8, v10
-; RV64V-ZVFH-NEXT: ret
-;
-; RV32V-ZVFHMIN-LABEL: mgather_v4f16:
-; RV32V-ZVFHMIN: # %bb.0:
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v0
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 1
-; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB70_5
-; RV32V-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 2
-; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB70_6
-; RV32V-ZVFHMIN-NEXT: .LBB70_2: # %else2
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 4
-; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB70_7
-; RV32V-ZVFHMIN-NEXT: .LBB70_3: # %else5
-; RV32V-ZVFHMIN-NEXT: andi a0, a0, 8
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB70_8
-; RV32V-ZVFHMIN-NEXT: .LBB70_4: # %else8
-; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV32V-ZVFHMIN-NEXT: ret
-; RV32V-ZVFHMIN-NEXT: .LBB70_5: # %cond.load
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v8
-; RV32V-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v9, a1
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 2
-; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB70_2
-; RV32V-ZVFHMIN-NEXT: .LBB70_6: # %cond.load1
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v10
-; RV32V-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v10, a1
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v10, 1
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 4
-; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB70_3
-; RV32V-ZVFHMIN-NEXT: .LBB70_7: # %cond.load4
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 2
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v10
-; RV32V-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v10, a1
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v10, 2
-; RV32V-ZVFHMIN-NEXT: andi a0, a0, 8
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB70_4
-; RV32V-ZVFHMIN-NEXT: .LBB70_8: # %cond.load7
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
-; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV32V-ZVFHMIN-NEXT: ret
-;
-; RV64V-ZVFHMIN-LABEL: mgather_v4f16:
-; RV64V-ZVFHMIN: # %bb.0:
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v0
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 1
-; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB70_5
-; RV64V-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 2
-; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB70_6
-; RV64V-ZVFHMIN-NEXT: .LBB70_2: # %else2
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 4
-; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB70_7
-; RV64V-ZVFHMIN-NEXT: .LBB70_3: # %else5
-; RV64V-ZVFHMIN-NEXT: andi a0, a0, 8
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB70_8
-; RV64V-ZVFHMIN-NEXT: .LBB70_4: # %else8
-; RV64V-ZVFHMIN-NEXT: vmv1r.v v8, v10
-; RV64V-ZVFHMIN-NEXT: ret
-; RV64V-ZVFHMIN-NEXT: .LBB70_5: # %cond.load
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v8
-; RV64V-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v10, a1
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 2
-; RV64V-ZVFHMIN-NEXT: beqz a1, .LBB70_2
-; RV64V-ZVFHMIN-NEXT: .LBB70_6: # %cond.load1
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 1
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v11
-; RV64V-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v11, a1
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v10, v11, 1
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 4
-; RV64V-ZVFHMIN-NEXT: beqz a1, .LBB70_3
-; RV64V-ZVFHMIN-NEXT: .LBB70_7: # %cond.load4
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v12, v8, 2
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v12
-; RV64V-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v11, a1
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v10, v11, 2
-; RV64V-ZVFHMIN-NEXT: andi a0, a0, 8
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB70_4
-; RV64V-ZVFHMIN-NEXT: .LBB70_8: # %cond.load7
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v10, v8, 3
-; RV64V-ZVFHMIN-NEXT: vmv1r.v v8, v10
-; RV64V-ZVFHMIN-NEXT: ret
-;
-; RV32ZVE32F-ZVFH-LABEL: mgather_v4f16:
-; RV32ZVE32F-ZVFH: # %bb.0:
-; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
-; RV32ZVE32F-ZVFH-NEXT: vluxei32.v v9, (zero), v8, v0.t
-; RV32ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9
-; RV32ZVE32F-ZVFH-NEXT: ret
+; RV32-LABEL: mgather_v4f16:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
+; RV32-NEXT: vmv1r.v v8, v9
+; RV32-NEXT: ret
+;
+; RV64V-LABEL: mgather_v4f16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
+; RV64V-NEXT: vmv1r.v v8, v10
+; RV64V-NEXT: ret
;
; RV64ZVE32F-ZVFH-LABEL: mgather_v4f16:
; RV64ZVE32F-ZVFH: # %bb.0:
@@ -9579,63 +7995,6 @@ define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passt
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-ZVFH-NEXT: ret
;
-; RV32ZVE32F-ZVFHMIN-LABEL: mgather_v4f16:
-; RV32ZVE32F-ZVFHMIN: # %bb.0:
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v0
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB70_5
-; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB70_6
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_2: # %else2
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB70_7
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_3: # %else5
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a0, 8
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB70_8
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_4: # %else8
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_5: # %cond.load
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a1
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB70_2
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_6: # %cond.load1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB70_3
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_7: # %cond.load4
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v10, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a0, 8
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB70_4
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_8: # %cond.load7
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-;
; RV64ZVE32F-ZVFHMIN-LABEL: mgather_v4f16:
; RV64ZVE32F-ZVFHMIN: # %bb.0:
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -9689,69 +8048,19 @@ define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passt
}
define <4 x half> @mgather_truemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) {
-; RV32V-ZVFH-LABEL: mgather_truemask_v4f16:
-; RV32V-ZVFH: # %bb.0:
-; RV32V-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32V-ZVFH-NEXT: vluxei32.v v9, (zero), v8
-; RV32V-ZVFH-NEXT: vmv1r.v v8, v9
-; RV32V-ZVFH-NEXT: ret
-;
-; RV64V-ZVFH-LABEL: mgather_truemask_v4f16:
-; RV64V-ZVFH: # %bb.0:
-; RV64V-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64V-ZVFH-NEXT: vluxei64.v v10, (zero), v8
-; RV64V-ZVFH-NEXT: vmv1r.v v8, v10
-; RV64V-ZVFH-NEXT: ret
-;
-; RV32V-ZVFHMIN-LABEL: mgather_truemask_v4f16:
-; RV32V-ZVFHMIN: # %bb.0:
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a2, v9
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a3, v8
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV32V-ZVFHMIN-NEXT: lh a2, 0(a2)
-; RV32V-ZVFHMIN-NEXT: lh a3, 0(a3)
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.v.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a1
-; RV32V-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a2
-; RV32V-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a3
-; RV32V-ZVFHMIN-NEXT: ret
-;
-; RV64V-ZVFHMIN-LABEL: mgather_truemask_v4f16:
-; RV64V-ZVFHMIN: # %bb.0:
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v10
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 2
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v10
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a3, v8
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV64V-ZVFHMIN-NEXT: lh a2, 0(a2)
-; RV64V-ZVFHMIN-NEXT: lh a3, 0(a3)
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.v.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a1
-; RV64V-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a2
-; RV64V-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a3
-; RV64V-ZVFHMIN-NEXT: ret
-;
-; RV32ZVE32F-ZVFH-LABEL: mgather_truemask_v4f16:
-; RV32ZVE32F-ZVFH: # %bb.0:
-; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32ZVE32F-ZVFH-NEXT: vluxei32.v v9, (zero), v8
-; RV32ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9
-; RV32ZVE32F-ZVFH-NEXT: ret
+; RV32-LABEL: mgather_truemask_v4f16:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT: vluxei32.v v9, (zero), v8
+; RV32-NEXT: vmv1r.v v8, v9
+; RV32-NEXT: ret
+;
+; RV64V-LABEL: mgather_truemask_v4f16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64V-NEXT: vluxei64.v v10, (zero), v8
+; RV64V-NEXT: vmv1r.v v8, v10
+; RV64V-NEXT: ret
;
; RV64ZVE32F-ZVFH-LABEL: mgather_truemask_v4f16:
; RV64ZVE32F-ZVFH: # %bb.0:
@@ -9770,27 +8079,6 @@ define <4 x half> @mgather_truemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru)
; RV64ZVE32F-ZVFH-NEXT: vfslide1down.vf v8, v8, fa2
; RV64ZVE32F-ZVFH-NEXT: ret
;
-; RV32ZVE32F-ZVFHMIN-LABEL: mgather_truemask_v4f16:
-; RV32ZVE32F-ZVFHMIN: # %bb.0:
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2)
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a3, 0(a3)
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.v.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a1
-; RV32ZVE32F-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a2
-; RV32ZVE32F-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a3
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-;
; RV64ZVE32F-ZVFHMIN-LABEL: mgather_truemask_v4f16:
; RV64ZVE32F-ZVFHMIN: # %bb.0:
; RV64ZVE32F-ZVFHMIN-NEXT: ld a1, 0(a0)
@@ -9832,259 +8120,19 @@ define <4 x half> @mgather_falsemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru
declare <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x half>)
define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passthru) {
-; RV32V-ZVFH-LABEL: mgather_v8f16:
-; RV32V-ZVFH: # %bb.0:
-; RV32V-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; RV32V-ZVFH-NEXT: vluxei32.v v10, (zero), v8, v0.t
-; RV32V-ZVFH-NEXT: vmv.v.v v8, v10
-; RV32V-ZVFH-NEXT: ret
-;
-; RV64V-ZVFH-LABEL: mgather_v8f16:
-; RV64V-ZVFH: # %bb.0:
-; RV64V-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; RV64V-ZVFH-NEXT: vluxei64.v v12, (zero), v8, v0.t
-; RV64V-ZVFH-NEXT: vmv.v.v v8, v12
-; RV64V-ZVFH-NEXT: ret
-;
-; RV32V-ZVFHMIN-LABEL: mgather_v8f16:
-; RV32V-ZVFHMIN: # %bb.0:
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v0
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 1
-; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB73_9
-; RV32V-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 2
-; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB73_10
-; RV32V-ZVFHMIN-NEXT: .LBB73_2: # %else2
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 4
-; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB73_11
-; RV32V-ZVFHMIN-NEXT: .LBB73_3: # %else5
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 8
-; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB73_12
-; RV32V-ZVFHMIN-NEXT: .LBB73_4: # %else8
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 16
-; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB73_13
-; RV32V-ZVFHMIN-NEXT: .LBB73_5: # %else11
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 32
-; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB73_14
-; RV32V-ZVFHMIN-NEXT: .LBB73_6: # %else14
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 64
-; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB73_15
-; RV32V-ZVFHMIN-NEXT: .LBB73_7: # %else17
-; RV32V-ZVFHMIN-NEXT: andi a0, a0, -128
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB73_16
-; RV32V-ZVFHMIN-NEXT: .LBB73_8: # %else20
-; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v10
-; RV32V-ZVFHMIN-NEXT: ret
-; RV32V-ZVFHMIN-NEXT: .LBB73_9: # %cond.load
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v8
-; RV32V-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v10, a1
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 2
-; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB73_2
-; RV32V-ZVFHMIN-NEXT: .LBB73_10: # %cond.load1
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 1
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v11
-; RV32V-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v11, a1
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v10, v11, 1
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 4
-; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB73_3
-; RV32V-ZVFHMIN-NEXT: .LBB73_11: # %cond.load4
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 2
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v11
-; RV32V-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v11, a1
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v10, v11, 2
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 8
-; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB73_4
-; RV32V-ZVFHMIN-NEXT: .LBB73_12: # %cond.load7
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 3
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v11
-; RV32V-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v11, a1
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v10, v11, 3
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 16
-; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB73_5
-; RV32V-ZVFHMIN-NEXT: .LBB73_13: # %cond.load10
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v8, 4
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v12
-; RV32V-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v11, a1
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v10, v11, 4
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 32
-; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB73_6
-; RV32V-ZVFHMIN-NEXT: .LBB73_14: # %cond.load13
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v8, 5
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v12
-; RV32V-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v11, a1
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v10, v11, 5
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 64
-; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB73_7
-; RV32V-ZVFHMIN-NEXT: .LBB73_15: # %cond.load16
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v8, 6
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v12
-; RV32V-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v11, a1
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v10, v11, 6
-; RV32V-ZVFHMIN-NEXT: andi a0, a0, -128
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB73_8
-; RV32V-ZVFHMIN-NEXT: .LBB73_16: # %cond.load19
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v10, v8, 7
-; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v10
-; RV32V-ZVFHMIN-NEXT: ret
-;
-; RV64V-ZVFHMIN-LABEL: mgather_v8f16:
-; RV64V-ZVFHMIN: # %bb.0:
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v0
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 1
-; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB73_11
-; RV64V-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 2
-; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB73_12
-; RV64V-ZVFHMIN-NEXT: .LBB73_2: # %else2
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 4
-; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB73_13
-; RV64V-ZVFHMIN-NEXT: .LBB73_3: # %else5
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 8
-; RV64V-ZVFHMIN-NEXT: beqz a1, .LBB73_5
-; RV64V-ZVFHMIN-NEXT: .LBB73_4: # %cond.load7
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v14, v8, 3
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v14
-; RV64V-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v13, a1
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v12, v13, 3
-; RV64V-ZVFHMIN-NEXT: .LBB73_5: # %else8
-; RV64V-ZVFHMIN-NEXT: addi sp, sp, -320
-; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa_offset 320
-; RV64V-ZVFHMIN-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
-; RV64V-ZVFHMIN-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
-; RV64V-ZVFHMIN-NEXT: .cfi_offset ra, -8
-; RV64V-ZVFHMIN-NEXT: .cfi_offset s0, -16
-; RV64V-ZVFHMIN-NEXT: addi s0, sp, 320
-; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa s0, 0
-; RV64V-ZVFHMIN-NEXT: andi sp, sp, -64
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 16
-; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB73_14
-; RV64V-ZVFHMIN-NEXT: # %bb.6: # %else11
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 32
-; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB73_15
-; RV64V-ZVFHMIN-NEXT: .LBB73_7: # %else14
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 64
-; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB73_16
-; RV64V-ZVFHMIN-NEXT: .LBB73_8: # %else17
-; RV64V-ZVFHMIN-NEXT: andi a0, a0, -128
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB73_10
-; RV64V-ZVFHMIN-NEXT: .LBB73_9: # %cond.load19
-; RV64V-ZVFHMIN-NEXT: mv a0, sp
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v8, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 56(sp)
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v12, v8, 7
-; RV64V-ZVFHMIN-NEXT: .LBB73_10: # %else20
-; RV64V-ZVFHMIN-NEXT: vmv1r.v v8, v12
-; RV64V-ZVFHMIN-NEXT: addi sp, s0, -320
-; RV64V-ZVFHMIN-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
-; RV64V-ZVFHMIN-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
-; RV64V-ZVFHMIN-NEXT: addi sp, sp, 320
-; RV64V-ZVFHMIN-NEXT: ret
-; RV64V-ZVFHMIN-NEXT: .LBB73_11: # %cond.load
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v8
-; RV64V-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v12, a1
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 2
-; RV64V-ZVFHMIN-NEXT: beqz a1, .LBB73_2
-; RV64V-ZVFHMIN-NEXT: .LBB73_12: # %cond.load1
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v13, v8, 1
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v13
-; RV64V-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v13, a1
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v12, v13, 1
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 4
-; RV64V-ZVFHMIN-NEXT: beqz a1, .LBB73_3
-; RV64V-ZVFHMIN-NEXT: .LBB73_13: # %cond.load4
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v14, v8, 2
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v14
-; RV64V-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v13, a1
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v12, v13, 2
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 8
-; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB73_4
-; RV64V-ZVFHMIN-NEXT: j .LBB73_5
-; RV64V-ZVFHMIN-NEXT: .LBB73_14: # %cond.load10
-; RV64V-ZVFHMIN-NEXT: addi a1, sp, 192
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v8, (a1)
-; RV64V-ZVFHMIN-NEXT: ld a1, 224(sp)
-; RV64V-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v13, a1
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v12, v13, 4
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 32
-; RV64V-ZVFHMIN-NEXT: beqz a1, .LBB73_7
-; RV64V-ZVFHMIN-NEXT: .LBB73_15: # %cond.load13
-; RV64V-ZVFHMIN-NEXT: addi a1, sp, 128
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v8, (a1)
-; RV64V-ZVFHMIN-NEXT: ld a1, 168(sp)
-; RV64V-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v13, a1
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v12, v13, 5
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 64
-; RV64V-ZVFHMIN-NEXT: beqz a1, .LBB73_8
-; RV64V-ZVFHMIN-NEXT: .LBB73_16: # %cond.load16
-; RV64V-ZVFHMIN-NEXT: addi a1, sp, 64
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v8, (a1)
-; RV64V-ZVFHMIN-NEXT: ld a1, 112(sp)
-; RV64V-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v13, a1
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v12, v13, 6
-; RV64V-ZVFHMIN-NEXT: andi a0, a0, -128
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB73_9
-; RV64V-ZVFHMIN-NEXT: j .LBB73_10
-;
-; RV32ZVE32F-ZVFH-LABEL: mgather_v8f16:
-; RV32ZVE32F-ZVFH: # %bb.0:
-; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; RV32ZVE32F-ZVFH-NEXT: vluxei32.v v10, (zero), v8, v0.t
-; RV32ZVE32F-ZVFH-NEXT: vmv.v.v v8, v10
-; RV32ZVE32F-ZVFH-NEXT: ret
+; RV32-LABEL: mgather_v8f16:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
+; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
+; RV32-NEXT: vmv.v.v v8, v10
+; RV32-NEXT: ret
+;
+; RV64V-LABEL: mgather_v8f16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, mu
+; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
+; RV64V-NEXT: vmv.v.v v8, v12
+; RV64V-NEXT: ret
;
; RV64ZVE32F-ZVFH-LABEL: mgather_v8f16:
; RV64ZVE32F-ZVFH: # %bb.0:
@@ -10179,115 +8227,6 @@ define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passt
; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v8, v9, 7
; RV64ZVE32F-ZVFH-NEXT: ret
;
-; RV32ZVE32F-ZVFHMIN-LABEL: mgather_v8f16:
-; RV32ZVE32F-ZVFHMIN: # %bb.0:
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v0
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB73_9
-; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB73_10
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_2: # %else2
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB73_11
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_3: # %else5
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 8
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB73_12
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_4: # %else8
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 16
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB73_13
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_5: # %else11
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 32
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB73_14
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_6: # %else14
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 64
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB73_15
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_7: # %else17
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a0, -128
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB73_16
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_8: # %else20
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_9: # %cond.load
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v10, a1
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB73_2
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_10: # %cond.load1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v11
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v10, v11, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB73_3
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_11: # %cond.load4
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v11
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v10, v11, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 8
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB73_4
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_12: # %cond.load7
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 3
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v11
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v10, v11, 3
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 16
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB73_5
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_13: # %cond.load10
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v8, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v10, v11, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 32
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB73_6
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_14: # %cond.load13
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v8, 5
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v10, v11, 5
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 64
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB73_7
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_15: # %cond.load16
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v8, 6
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v11, a1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v10, v11, 6
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a0, -128
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB73_8
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB73_16: # %cond.load19
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v10, v8, 7
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-;
; RV64ZVE32F-ZVFHMIN-LABEL: mgather_v8f16:
; RV64ZVE32F-ZVFHMIN: # %bb.0:
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -10385,276 +8324,25 @@ define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passt
}
define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) {
-; RV32V-ZVFH-LABEL: mgather_baseidx_v8i8_v8f16:
-; RV32V-ZVFH: # %bb.0:
-; RV32V-ZVFH-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32V-ZVFH-NEXT: vsext.vf4 v10, v8
-; RV32V-ZVFH-NEXT: vadd.vv v10, v10, v10
-; RV32V-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32V-ZVFH-NEXT: vluxei32.v v9, (a0), v10, v0.t
-; RV32V-ZVFH-NEXT: vmv.v.v v8, v9
-; RV32V-ZVFH-NEXT: ret
-;
-; RV64V-ZVFH-LABEL: mgather_baseidx_v8i8_v8f16:
-; RV64V-ZVFH: # %bb.0:
-; RV64V-ZVFH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFH-NEXT: vsext.vf8 v12, v8
-; RV64V-ZVFH-NEXT: vadd.vv v12, v12, v12
-; RV64V-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV64V-ZVFH-NEXT: vluxei64.v v9, (a0), v12, v0.t
-; RV64V-ZVFH-NEXT: vmv.v.v v8, v9
-; RV64V-ZVFH-NEXT: ret
-;
-; RV32V-ZVFHMIN-LABEL: mgather_baseidx_v8i8_v8f16:
-; RV32V-ZVFHMIN: # %bb.0:
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vsext.vf4 v10, v8
-; RV32V-ZVFHMIN-NEXT: vadd.vv v10, v10, v10
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v0
-; RV32V-ZVFHMIN-NEXT: andi a2, a1, 1
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
-; RV32V-ZVFHMIN-NEXT: bnez a2, .LBB74_9
-; RV32V-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB74_10
-; RV32V-ZVFHMIN-NEXT: .LBB74_2: # %else2
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB74_11
-; RV32V-ZVFHMIN-NEXT: .LBB74_3: # %else5
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB74_12
-; RV32V-ZVFHMIN-NEXT: .LBB74_4: # %else8
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB74_13
-; RV32V-ZVFHMIN-NEXT: .LBB74_5: # %else11
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB74_14
-; RV32V-ZVFHMIN-NEXT: .LBB74_6: # %else14
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB74_15
-; RV32V-ZVFHMIN-NEXT: .LBB74_7: # %else17
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB74_16
-; RV32V-ZVFHMIN-NEXT: .LBB74_8: # %else20
-; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV32V-ZVFHMIN-NEXT: ret
-; RV32V-ZVFHMIN-NEXT: .LBB74_9: # %cond.load
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v9, a0
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB74_2
-; RV32V-ZVFHMIN-NEXT: .LBB74_10: # %cond.load1
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB74_3
-; RV32V-ZVFHMIN-NEXT: .LBB74_11: # %cond.load4
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 2
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB74_4
-; RV32V-ZVFHMIN-NEXT: .LBB74_12: # %cond.load7
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 3
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB74_5
-; RV32V-ZVFHMIN-NEXT: .LBB74_13: # %cond.load10
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB74_6
-; RV32V-ZVFHMIN-NEXT: .LBB74_14: # %cond.load13
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB74_7
-; RV32V-ZVFHMIN-NEXT: .LBB74_15: # %cond.load16
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 6
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB74_8
-; RV32V-ZVFHMIN-NEXT: .LBB74_16: # %cond.load19
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v10, v10, 7
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
-; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV32V-ZVFHMIN-NEXT: ret
-;
-; RV64V-ZVFHMIN-LABEL: mgather_baseidx_v8i8_v8f16:
-; RV64V-ZVFHMIN: # %bb.0:
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vsext.vf8 v12, v8
-; RV64V-ZVFHMIN-NEXT: vadd.vv v12, v12, v12
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v0
-; RV64V-ZVFHMIN-NEXT: andi a2, a1, 1
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vadd.vx v12, v12, a0
-; RV64V-ZVFHMIN-NEXT: bnez a2, .LBB74_11
-; RV64V-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB74_12
-; RV64V-ZVFHMIN-NEXT: .LBB74_2: # %else2
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB74_13
-; RV64V-ZVFHMIN-NEXT: .LBB74_3: # %else5
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB74_5
-; RV64V-ZVFHMIN-NEXT: .LBB74_4: # %cond.load7
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 3
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
-; RV64V-ZVFHMIN-NEXT: .LBB74_5: # %else8
-; RV64V-ZVFHMIN-NEXT: addi sp, sp, -320
-; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa_offset 320
-; RV64V-ZVFHMIN-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
-; RV64V-ZVFHMIN-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
-; RV64V-ZVFHMIN-NEXT: .cfi_offset ra, -8
-; RV64V-ZVFHMIN-NEXT: .cfi_offset s0, -16
-; RV64V-ZVFHMIN-NEXT: addi s0, sp, 320
-; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa s0, 0
-; RV64V-ZVFHMIN-NEXT: andi sp, sp, -64
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB74_14
-; RV64V-ZVFHMIN-NEXT: # %bb.6: # %else11
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB74_15
-; RV64V-ZVFHMIN-NEXT: .LBB74_7: # %else14
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB74_16
-; RV64V-ZVFHMIN-NEXT: .LBB74_8: # %else17
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB74_10
-; RV64V-ZVFHMIN-NEXT: .LBB74_9: # %cond.load19
-; RV64V-ZVFHMIN-NEXT: mv a0, sp
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 56(sp)
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
-; RV64V-ZVFHMIN-NEXT: .LBB74_10: # %else20
-; RV64V-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV64V-ZVFHMIN-NEXT: addi sp, s0, -320
-; RV64V-ZVFHMIN-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
-; RV64V-ZVFHMIN-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
-; RV64V-ZVFHMIN-NEXT: addi sp, sp, 320
-; RV64V-ZVFHMIN-NEXT: ret
-; RV64V-ZVFHMIN-NEXT: .LBB74_11: # %cond.load
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v9, a0
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB74_2
-; RV64V-ZVFHMIN-NEXT: .LBB74_12: # %cond.load1
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v12, 1
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB74_3
-; RV64V-ZVFHMIN-NEXT: .LBB74_13: # %cond.load4
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 2
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 2
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB74_4
-; RV64V-ZVFHMIN-NEXT: j .LBB74_5
-; RV64V-ZVFHMIN-NEXT: .LBB74_14: # %cond.load10
-; RV64V-ZVFHMIN-NEXT: addi a0, sp, 192
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 224(sp)
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB74_7
-; RV64V-ZVFHMIN-NEXT: .LBB74_15: # %cond.load13
-; RV64V-ZVFHMIN-NEXT: addi a0, sp, 128
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 168(sp)
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB74_8
-; RV64V-ZVFHMIN-NEXT: .LBB74_16: # %cond.load16
-; RV64V-ZVFHMIN-NEXT: addi a0, sp, 64
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 112(sp)
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 6
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB74_9
-; RV64V-ZVFHMIN-NEXT: j .LBB74_10
-;
-; RV32ZVE32F-ZVFH-LABEL: mgather_baseidx_v8i8_v8f16:
-; RV32ZVE32F-ZVFH: # %bb.0:
-; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32ZVE32F-ZVFH-NEXT: vsext.vf4 v10, v8
-; RV32ZVE32F-ZVFH-NEXT: vadd.vv v10, v10, v10
-; RV32ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32ZVE32F-ZVFH-NEXT: vluxei32.v v9, (a0), v10, v0.t
-; RV32ZVE32F-ZVFH-NEXT: vmv.v.v v8, v9
-; RV32ZVE32F-ZVFH-NEXT: ret
+; RV32-LABEL: mgather_baseidx_v8i8_v8f16:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vsext.vf4 v10, v8
+; RV32-NEXT: vadd.vv v10, v10, v10
+; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
+; RV32-NEXT: vmv.v.v v8, v9
+; RV32-NEXT: ret
+;
+; RV64V-LABEL: mgather_baseidx_v8i8_v8f16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf8 v12, v8
+; RV64V-NEXT: vadd.vv v12, v12, v12
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
+; RV64V-NEXT: vmv.v.v v8, v9
+; RV64V-NEXT: ret
;
; RV64ZVE32F-ZVFH-LABEL: mgather_baseidx_v8i8_v8f16:
; RV64ZVE32F-ZVFH: # %bb.0:
@@ -10783,119 +8471,6 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1
; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-ZVFH-NEXT: ret
;
-; RV32ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_v8i8_v8f16:
-; RV32ZVE32F-ZVFHMIN: # %bb.0:
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vsext.vf4 v10, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: vadd.vv v10, v10, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_9
-; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB74_10
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_2: # %else2
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB74_11
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_3: # %else5
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB74_12
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_4: # %else8
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB74_13
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_5: # %else11
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB74_14
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_6: # %else14
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB74_15
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_7: # %else17
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB74_16
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_8: # %else20
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_9: # %cond.load
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB74_2
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_10: # %cond.load1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB74_3
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_11: # %cond.load4
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB74_4
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_12: # %cond.load7
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 3
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB74_5
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_13: # %cond.load10
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB74_6
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_14: # %cond.load13
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB74_7
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_15: # %cond.load16
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 6
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB74_8
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB74_16: # %cond.load19
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v10, 7
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-;
; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_v8i8_v8f16:
; RV64ZVE32F-ZVFHMIN: # %bb.0:
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -11028,276 +8603,25 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1
}
define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) {
-; RV32V-ZVFH-LABEL: mgather_baseidx_sext_v8i8_v8f16:
-; RV32V-ZVFH: # %bb.0:
-; RV32V-ZVFH-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32V-ZVFH-NEXT: vsext.vf4 v10, v8
-; RV32V-ZVFH-NEXT: vadd.vv v10, v10, v10
-; RV32V-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32V-ZVFH-NEXT: vluxei32.v v9, (a0), v10, v0.t
-; RV32V-ZVFH-NEXT: vmv.v.v v8, v9
-; RV32V-ZVFH-NEXT: ret
-;
-; RV64V-ZVFH-LABEL: mgather_baseidx_sext_v8i8_v8f16:
-; RV64V-ZVFH: # %bb.0:
-; RV64V-ZVFH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFH-NEXT: vsext.vf8 v12, v8
-; RV64V-ZVFH-NEXT: vadd.vv v12, v12, v12
-; RV64V-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV64V-ZVFH-NEXT: vluxei64.v v9, (a0), v12, v0.t
-; RV64V-ZVFH-NEXT: vmv.v.v v8, v9
-; RV64V-ZVFH-NEXT: ret
-;
-; RV32V-ZVFHMIN-LABEL: mgather_baseidx_sext_v8i8_v8f16:
-; RV32V-ZVFHMIN: # %bb.0:
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vsext.vf4 v10, v8
-; RV32V-ZVFHMIN-NEXT: vadd.vv v10, v10, v10
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v0
-; RV32V-ZVFHMIN-NEXT: andi a2, a1, 1
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
-; RV32V-ZVFHMIN-NEXT: bnez a2, .LBB75_9
-; RV32V-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB75_10
-; RV32V-ZVFHMIN-NEXT: .LBB75_2: # %else2
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB75_11
-; RV32V-ZVFHMIN-NEXT: .LBB75_3: # %else5
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB75_12
-; RV32V-ZVFHMIN-NEXT: .LBB75_4: # %else8
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB75_13
-; RV32V-ZVFHMIN-NEXT: .LBB75_5: # %else11
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB75_14
-; RV32V-ZVFHMIN-NEXT: .LBB75_6: # %else14
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB75_15
-; RV32V-ZVFHMIN-NEXT: .LBB75_7: # %else17
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB75_16
-; RV32V-ZVFHMIN-NEXT: .LBB75_8: # %else20
-; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV32V-ZVFHMIN-NEXT: ret
-; RV32V-ZVFHMIN-NEXT: .LBB75_9: # %cond.load
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v9, a0
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB75_2
-; RV32V-ZVFHMIN-NEXT: .LBB75_10: # %cond.load1
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB75_3
-; RV32V-ZVFHMIN-NEXT: .LBB75_11: # %cond.load4
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 2
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB75_4
-; RV32V-ZVFHMIN-NEXT: .LBB75_12: # %cond.load7
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 3
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB75_5
-; RV32V-ZVFHMIN-NEXT: .LBB75_13: # %cond.load10
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB75_6
-; RV32V-ZVFHMIN-NEXT: .LBB75_14: # %cond.load13
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB75_7
-; RV32V-ZVFHMIN-NEXT: .LBB75_15: # %cond.load16
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 6
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB75_8
-; RV32V-ZVFHMIN-NEXT: .LBB75_16: # %cond.load19
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v10, v10, 7
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
-; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV32V-ZVFHMIN-NEXT: ret
-;
-; RV64V-ZVFHMIN-LABEL: mgather_baseidx_sext_v8i8_v8f16:
-; RV64V-ZVFHMIN: # %bb.0:
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vsext.vf8 v12, v8
-; RV64V-ZVFHMIN-NEXT: vadd.vv v12, v12, v12
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v0
-; RV64V-ZVFHMIN-NEXT: andi a2, a1, 1
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vadd.vx v12, v12, a0
-; RV64V-ZVFHMIN-NEXT: bnez a2, .LBB75_11
-; RV64V-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB75_12
-; RV64V-ZVFHMIN-NEXT: .LBB75_2: # %else2
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB75_13
-; RV64V-ZVFHMIN-NEXT: .LBB75_3: # %else5
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB75_5
-; RV64V-ZVFHMIN-NEXT: .LBB75_4: # %cond.load7
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 3
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
-; RV64V-ZVFHMIN-NEXT: .LBB75_5: # %else8
-; RV64V-ZVFHMIN-NEXT: addi sp, sp, -320
-; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa_offset 320
-; RV64V-ZVFHMIN-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
-; RV64V-ZVFHMIN-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
-; RV64V-ZVFHMIN-NEXT: .cfi_offset ra, -8
-; RV64V-ZVFHMIN-NEXT: .cfi_offset s0, -16
-; RV64V-ZVFHMIN-NEXT: addi s0, sp, 320
-; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa s0, 0
-; RV64V-ZVFHMIN-NEXT: andi sp, sp, -64
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB75_14
-; RV64V-ZVFHMIN-NEXT: # %bb.6: # %else11
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB75_15
-; RV64V-ZVFHMIN-NEXT: .LBB75_7: # %else14
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB75_16
-; RV64V-ZVFHMIN-NEXT: .LBB75_8: # %else17
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB75_10
-; RV64V-ZVFHMIN-NEXT: .LBB75_9: # %cond.load19
-; RV64V-ZVFHMIN-NEXT: mv a0, sp
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 56(sp)
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
-; RV64V-ZVFHMIN-NEXT: .LBB75_10: # %else20
-; RV64V-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV64V-ZVFHMIN-NEXT: addi sp, s0, -320
-; RV64V-ZVFHMIN-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
-; RV64V-ZVFHMIN-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
-; RV64V-ZVFHMIN-NEXT: addi sp, sp, 320
-; RV64V-ZVFHMIN-NEXT: ret
-; RV64V-ZVFHMIN-NEXT: .LBB75_11: # %cond.load
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v9, a0
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB75_2
-; RV64V-ZVFHMIN-NEXT: .LBB75_12: # %cond.load1
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v12, 1
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB75_3
-; RV64V-ZVFHMIN-NEXT: .LBB75_13: # %cond.load4
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 2
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 2
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB75_4
-; RV64V-ZVFHMIN-NEXT: j .LBB75_5
-; RV64V-ZVFHMIN-NEXT: .LBB75_14: # %cond.load10
-; RV64V-ZVFHMIN-NEXT: addi a0, sp, 192
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 224(sp)
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB75_7
-; RV64V-ZVFHMIN-NEXT: .LBB75_15: # %cond.load13
-; RV64V-ZVFHMIN-NEXT: addi a0, sp, 128
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 168(sp)
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB75_8
-; RV64V-ZVFHMIN-NEXT: .LBB75_16: # %cond.load16
-; RV64V-ZVFHMIN-NEXT: addi a0, sp, 64
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 112(sp)
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 6
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB75_9
-; RV64V-ZVFHMIN-NEXT: j .LBB75_10
-;
-; RV32ZVE32F-ZVFH-LABEL: mgather_baseidx_sext_v8i8_v8f16:
-; RV32ZVE32F-ZVFH: # %bb.0:
-; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32ZVE32F-ZVFH-NEXT: vsext.vf4 v10, v8
-; RV32ZVE32F-ZVFH-NEXT: vadd.vv v10, v10, v10
-; RV32ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32ZVE32F-ZVFH-NEXT: vluxei32.v v9, (a0), v10, v0.t
-; RV32ZVE32F-ZVFH-NEXT: vmv.v.v v8, v9
-; RV32ZVE32F-ZVFH-NEXT: ret
+; RV32-LABEL: mgather_baseidx_sext_v8i8_v8f16:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vsext.vf4 v10, v8
+; RV32-NEXT: vadd.vv v10, v10, v10
+; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
+; RV32-NEXT: vmv.v.v v8, v9
+; RV32-NEXT: ret
+;
+; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8f16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf8 v12, v8
+; RV64V-NEXT: vadd.vv v12, v12, v12
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
+; RV64V-NEXT: vmv.v.v v8, v9
+; RV64V-NEXT: ret
;
; RV64ZVE32F-ZVFH-LABEL: mgather_baseidx_sext_v8i8_v8f16:
; RV64ZVE32F-ZVFH: # %bb.0:
@@ -11426,119 +8750,6 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-ZVFH-NEXT: ret
;
-; RV32ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_sext_v8i8_v8f16:
-; RV32ZVE32F-ZVFHMIN: # %bb.0:
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vsext.vf4 v10, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: vadd.vv v10, v10, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_9
-; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB75_10
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_2: # %else2
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB75_11
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_3: # %else5
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB75_12
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_4: # %else8
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB75_13
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_5: # %else11
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB75_14
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_6: # %else14
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB75_15
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_7: # %else17
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB75_16
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_8: # %else20
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_9: # %cond.load
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB75_2
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_10: # %cond.load1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB75_3
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_11: # %cond.load4
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB75_4
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_12: # %cond.load7
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 3
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB75_5
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_13: # %cond.load10
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB75_6
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_14: # %cond.load13
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB75_7
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_15: # %cond.load16
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 6
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB75_8
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB75_16: # %cond.load19
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v10, 7
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-;
; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_sext_v8i8_v8f16:
; RV64ZVE32F-ZVFHMIN: # %bb.0:
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -11672,273 +8883,23 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
}
define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) {
-; RV32V-ZVFH-LABEL: mgather_baseidx_zext_v8i8_v8f16:
-; RV32V-ZVFH: # %bb.0:
-; RV32V-ZVFH-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32V-ZVFH-NEXT: vwaddu.vv v10, v8, v8
-; RV32V-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32V-ZVFH-NEXT: vluxei16.v v9, (a0), v10, v0.t
-; RV32V-ZVFH-NEXT: vmv.v.v v8, v9
-; RV32V-ZVFH-NEXT: ret
-;
-; RV64V-ZVFH-LABEL: mgather_baseidx_zext_v8i8_v8f16:
-; RV64V-ZVFH: # %bb.0:
-; RV64V-ZVFH-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64V-ZVFH-NEXT: vwaddu.vv v10, v8, v8
-; RV64V-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV64V-ZVFH-NEXT: vluxei16.v v9, (a0), v10, v0.t
-; RV64V-ZVFH-NEXT: vmv.v.v v8, v9
-; RV64V-ZVFH-NEXT: ret
-;
-; RV32V-ZVFHMIN-LABEL: mgather_baseidx_zext_v8i8_v8f16:
-; RV32V-ZVFHMIN: # %bb.0:
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vzext.vf4 v10, v8
-; RV32V-ZVFHMIN-NEXT: vadd.vv v10, v10, v10
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v0
-; RV32V-ZVFHMIN-NEXT: andi a2, a1, 1
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
-; RV32V-ZVFHMIN-NEXT: bnez a2, .LBB76_9
-; RV32V-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB76_10
-; RV32V-ZVFHMIN-NEXT: .LBB76_2: # %else2
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB76_11
-; RV32V-ZVFHMIN-NEXT: .LBB76_3: # %else5
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB76_12
-; RV32V-ZVFHMIN-NEXT: .LBB76_4: # %else8
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB76_13
-; RV32V-ZVFHMIN-NEXT: .LBB76_5: # %else11
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB76_14
-; RV32V-ZVFHMIN-NEXT: .LBB76_6: # %else14
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB76_15
-; RV32V-ZVFHMIN-NEXT: .LBB76_7: # %else17
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB76_16
-; RV32V-ZVFHMIN-NEXT: .LBB76_8: # %else20
-; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV32V-ZVFHMIN-NEXT: ret
-; RV32V-ZVFHMIN-NEXT: .LBB76_9: # %cond.load
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v9, a0
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB76_2
-; RV32V-ZVFHMIN-NEXT: .LBB76_10: # %cond.load1
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB76_3
-; RV32V-ZVFHMIN-NEXT: .LBB76_11: # %cond.load4
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 2
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB76_4
-; RV32V-ZVFHMIN-NEXT: .LBB76_12: # %cond.load7
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 3
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB76_5
-; RV32V-ZVFHMIN-NEXT: .LBB76_13: # %cond.load10
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB76_6
-; RV32V-ZVFHMIN-NEXT: .LBB76_14: # %cond.load13
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB76_7
-; RV32V-ZVFHMIN-NEXT: .LBB76_15: # %cond.load16
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 6
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB76_8
-; RV32V-ZVFHMIN-NEXT: .LBB76_16: # %cond.load19
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v10, v10, 7
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
-; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV32V-ZVFHMIN-NEXT: ret
-;
-; RV64V-ZVFHMIN-LABEL: mgather_baseidx_zext_v8i8_v8f16:
-; RV64V-ZVFHMIN: # %bb.0:
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vzext.vf8 v12, v8
-; RV64V-ZVFHMIN-NEXT: vadd.vv v12, v12, v12
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v0
-; RV64V-ZVFHMIN-NEXT: andi a2, a1, 1
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vadd.vx v12, v12, a0
-; RV64V-ZVFHMIN-NEXT: bnez a2, .LBB76_11
-; RV64V-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB76_12
-; RV64V-ZVFHMIN-NEXT: .LBB76_2: # %else2
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB76_13
-; RV64V-ZVFHMIN-NEXT: .LBB76_3: # %else5
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB76_5
-; RV64V-ZVFHMIN-NEXT: .LBB76_4: # %cond.load7
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 3
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
-; RV64V-ZVFHMIN-NEXT: .LBB76_5: # %else8
-; RV64V-ZVFHMIN-NEXT: addi sp, sp, -320
-; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa_offset 320
-; RV64V-ZVFHMIN-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
-; RV64V-ZVFHMIN-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
-; RV64V-ZVFHMIN-NEXT: .cfi_offset ra, -8
-; RV64V-ZVFHMIN-NEXT: .cfi_offset s0, -16
-; RV64V-ZVFHMIN-NEXT: addi s0, sp, 320
-; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa s0, 0
-; RV64V-ZVFHMIN-NEXT: andi sp, sp, -64
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB76_14
-; RV64V-ZVFHMIN-NEXT: # %bb.6: # %else11
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB76_15
-; RV64V-ZVFHMIN-NEXT: .LBB76_7: # %else14
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB76_16
-; RV64V-ZVFHMIN-NEXT: .LBB76_8: # %else17
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB76_10
-; RV64V-ZVFHMIN-NEXT: .LBB76_9: # %cond.load19
-; RV64V-ZVFHMIN-NEXT: mv a0, sp
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 56(sp)
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
-; RV64V-ZVFHMIN-NEXT: .LBB76_10: # %else20
-; RV64V-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV64V-ZVFHMIN-NEXT: addi sp, s0, -320
-; RV64V-ZVFHMIN-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
-; RV64V-ZVFHMIN-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
-; RV64V-ZVFHMIN-NEXT: addi sp, sp, 320
-; RV64V-ZVFHMIN-NEXT: ret
-; RV64V-ZVFHMIN-NEXT: .LBB76_11: # %cond.load
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v9, a0
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB76_2
-; RV64V-ZVFHMIN-NEXT: .LBB76_12: # %cond.load1
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v12, 1
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB76_3
-; RV64V-ZVFHMIN-NEXT: .LBB76_13: # %cond.load4
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 2
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 2
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB76_4
-; RV64V-ZVFHMIN-NEXT: j .LBB76_5
-; RV64V-ZVFHMIN-NEXT: .LBB76_14: # %cond.load10
-; RV64V-ZVFHMIN-NEXT: addi a0, sp, 192
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 224(sp)
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB76_7
-; RV64V-ZVFHMIN-NEXT: .LBB76_15: # %cond.load13
-; RV64V-ZVFHMIN-NEXT: addi a0, sp, 128
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 168(sp)
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB76_8
-; RV64V-ZVFHMIN-NEXT: .LBB76_16: # %cond.load16
-; RV64V-ZVFHMIN-NEXT: addi a0, sp, 64
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 112(sp)
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 6
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB76_9
-; RV64V-ZVFHMIN-NEXT: j .LBB76_10
-;
-; RV32ZVE32F-ZVFH-LABEL: mgather_baseidx_zext_v8i8_v8f16:
-; RV32ZVE32F-ZVFH: # %bb.0:
-; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32ZVE32F-ZVFH-NEXT: vwaddu.vv v10, v8, v8
-; RV32ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32ZVE32F-ZVFH-NEXT: vluxei16.v v9, (a0), v10, v0.t
-; RV32ZVE32F-ZVFH-NEXT: vmv.v.v v8, v9
-; RV32ZVE32F-ZVFH-NEXT: ret
+; RV32-LABEL: mgather_baseidx_zext_v8i8_v8f16:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vwaddu.vv v10, v8, v8
+; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; RV32-NEXT: vluxei16.v v9, (a0), v10, v0.t
+; RV32-NEXT: vmv.v.v v8, v9
+; RV32-NEXT: ret
+;
+; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64V-NEXT: vwaddu.vv v10, v8, v8
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; RV64V-NEXT: vluxei16.v v9, (a0), v10, v0.t
+; RV64V-NEXT: vmv.v.v v8, v9
+; RV64V-NEXT: ret
;
; RV64ZVE32F-ZVFH-LABEL: mgather_baseidx_zext_v8i8_v8f16:
; RV64ZVE32F-ZVFH: # %bb.0:
@@ -12075,119 +9036,6 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-ZVFH-NEXT: ret
;
-; RV32ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_zext_v8i8_v8f16:
-; RV32ZVE32F-ZVFHMIN: # %bb.0:
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vzext.vf4 v10, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: vadd.vv v10, v10, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_9
-; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB76_10
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_2: # %else2
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB76_11
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_3: # %else5
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB76_12
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_4: # %else8
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB76_13
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_5: # %else11
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB76_14
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_6: # %else14
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB76_15
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_7: # %else17
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB76_16
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_8: # %else20
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_9: # %cond.load
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB76_2
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_10: # %cond.load1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB76_3
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_11: # %cond.load4
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB76_4
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_12: # %cond.load7
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 3
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB76_5
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_13: # %cond.load10
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB76_6
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_14: # %cond.load13
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB76_7
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_15: # %cond.load16
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 6
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB76_8
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB76_16: # %cond.load19
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v10, 7
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-;
; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_zext_v8i8_v8f16:
; RV64ZVE32F-ZVFHMIN: # %bb.0:
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -12329,271 +9177,23 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
}
define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x half> %passthru) {
-; RV32V-ZVFH-LABEL: mgather_baseidx_v8f16:
-; RV32V-ZVFH: # %bb.0:
-; RV32V-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; RV32V-ZVFH-NEXT: vwadd.vv v10, v8, v8
-; RV32V-ZVFH-NEXT: vluxei32.v v9, (a0), v10, v0.t
-; RV32V-ZVFH-NEXT: vmv.v.v v8, v9
-; RV32V-ZVFH-NEXT: ret
-;
-; RV64V-ZVFH-LABEL: mgather_baseidx_v8f16:
-; RV64V-ZVFH: # %bb.0:
-; RV64V-ZVFH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFH-NEXT: vsext.vf4 v12, v8
-; RV64V-ZVFH-NEXT: vadd.vv v12, v12, v12
-; RV64V-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV64V-ZVFH-NEXT: vluxei64.v v9, (a0), v12, v0.t
-; RV64V-ZVFH-NEXT: vmv.v.v v8, v9
-; RV64V-ZVFH-NEXT: ret
-;
-; RV32V-ZVFHMIN-LABEL: mgather_baseidx_v8f16:
-; RV32V-ZVFHMIN: # %bb.0:
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vwadd.vv v10, v8, v8
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v0
-; RV32V-ZVFHMIN-NEXT: andi a2, a1, 1
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
-; RV32V-ZVFHMIN-NEXT: bnez a2, .LBB77_9
-; RV32V-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB77_10
-; RV32V-ZVFHMIN-NEXT: .LBB77_2: # %else2
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB77_11
-; RV32V-ZVFHMIN-NEXT: .LBB77_3: # %else5
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB77_12
-; RV32V-ZVFHMIN-NEXT: .LBB77_4: # %else8
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB77_13
-; RV32V-ZVFHMIN-NEXT: .LBB77_5: # %else11
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB77_14
-; RV32V-ZVFHMIN-NEXT: .LBB77_6: # %else14
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB77_15
-; RV32V-ZVFHMIN-NEXT: .LBB77_7: # %else17
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB77_16
-; RV32V-ZVFHMIN-NEXT: .LBB77_8: # %else20
-; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV32V-ZVFHMIN-NEXT: ret
-; RV32V-ZVFHMIN-NEXT: .LBB77_9: # %cond.load
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v9, a0
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB77_2
-; RV32V-ZVFHMIN-NEXT: .LBB77_10: # %cond.load1
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB77_3
-; RV32V-ZVFHMIN-NEXT: .LBB77_11: # %cond.load4
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 2
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB77_4
-; RV32V-ZVFHMIN-NEXT: .LBB77_12: # %cond.load7
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 3
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB77_5
-; RV32V-ZVFHMIN-NEXT: .LBB77_13: # %cond.load10
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB77_6
-; RV32V-ZVFHMIN-NEXT: .LBB77_14: # %cond.load13
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB77_7
-; RV32V-ZVFHMIN-NEXT: .LBB77_15: # %cond.load16
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 6
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB77_8
-; RV32V-ZVFHMIN-NEXT: .LBB77_16: # %cond.load19
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v10, v10, 7
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
-; RV32V-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV32V-ZVFHMIN-NEXT: ret
-;
-; RV64V-ZVFHMIN-LABEL: mgather_baseidx_v8f16:
-; RV64V-ZVFHMIN: # %bb.0:
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vsext.vf4 v12, v8
-; RV64V-ZVFHMIN-NEXT: vadd.vv v12, v12, v12
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v0
-; RV64V-ZVFHMIN-NEXT: andi a2, a1, 1
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vadd.vx v12, v12, a0
-; RV64V-ZVFHMIN-NEXT: bnez a2, .LBB77_11
-; RV64V-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB77_12
-; RV64V-ZVFHMIN-NEXT: .LBB77_2: # %else2
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB77_13
-; RV64V-ZVFHMIN-NEXT: .LBB77_3: # %else5
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB77_5
-; RV64V-ZVFHMIN-NEXT: .LBB77_4: # %cond.load7
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 3
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
-; RV64V-ZVFHMIN-NEXT: .LBB77_5: # %else8
-; RV64V-ZVFHMIN-NEXT: addi sp, sp, -320
-; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa_offset 320
-; RV64V-ZVFHMIN-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
-; RV64V-ZVFHMIN-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
-; RV64V-ZVFHMIN-NEXT: .cfi_offset ra, -8
-; RV64V-ZVFHMIN-NEXT: .cfi_offset s0, -16
-; RV64V-ZVFHMIN-NEXT: addi s0, sp, 320
-; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa s0, 0
-; RV64V-ZVFHMIN-NEXT: andi sp, sp, -64
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB77_14
-; RV64V-ZVFHMIN-NEXT: # %bb.6: # %else11
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB77_15
-; RV64V-ZVFHMIN-NEXT: .LBB77_7: # %else14
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB77_16
-; RV64V-ZVFHMIN-NEXT: .LBB77_8: # %else17
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB77_10
-; RV64V-ZVFHMIN-NEXT: .LBB77_9: # %cond.load19
-; RV64V-ZVFHMIN-NEXT: mv a0, sp
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 56(sp)
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
-; RV64V-ZVFHMIN-NEXT: .LBB77_10: # %else20
-; RV64V-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV64V-ZVFHMIN-NEXT: addi sp, s0, -320
-; RV64V-ZVFHMIN-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
-; RV64V-ZVFHMIN-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
-; RV64V-ZVFHMIN-NEXT: addi sp, sp, 320
-; RV64V-ZVFHMIN-NEXT: ret
-; RV64V-ZVFHMIN-NEXT: .LBB77_11: # %cond.load
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v9, a0
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB77_2
-; RV64V-ZVFHMIN-NEXT: .LBB77_12: # %cond.load1
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v12, 1
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB77_3
-; RV64V-ZVFHMIN-NEXT: .LBB77_13: # %cond.load4
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 2
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 2
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB77_4
-; RV64V-ZVFHMIN-NEXT: j .LBB77_5
-; RV64V-ZVFHMIN-NEXT: .LBB77_14: # %cond.load10
-; RV64V-ZVFHMIN-NEXT: addi a0, sp, 192
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 224(sp)
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB77_7
-; RV64V-ZVFHMIN-NEXT: .LBB77_15: # %cond.load13
-; RV64V-ZVFHMIN-NEXT: addi a0, sp, 128
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 168(sp)
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB77_8
-; RV64V-ZVFHMIN-NEXT: .LBB77_16: # %cond.load16
-; RV64V-ZVFHMIN-NEXT: addi a0, sp, 64
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 112(sp)
-; RV64V-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
-; RV64V-ZVFHMIN-NEXT: vslideup.vi v9, v8, 6
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB77_9
-; RV64V-ZVFHMIN-NEXT: j .LBB77_10
-;
-; RV32ZVE32F-ZVFH-LABEL: mgather_baseidx_v8f16:
-; RV32ZVE32F-ZVFH: # %bb.0:
-; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, mu
-; RV32ZVE32F-ZVFH-NEXT: vwadd.vv v10, v8, v8
-; RV32ZVE32F-ZVFH-NEXT: vluxei32.v v9, (a0), v10, v0.t
-; RV32ZVE32F-ZVFH-NEXT: vmv.v.v v8, v9
-; RV32ZVE32F-ZVFH-NEXT: ret
+; RV32-LABEL: mgather_baseidx_v8f16:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
+; RV32-NEXT: vwadd.vv v10, v8, v8
+; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
+; RV32-NEXT: vmv.v.v v8, v9
+; RV32-NEXT: ret
+;
+; RV64V-LABEL: mgather_baseidx_v8f16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf4 v12, v8
+; RV64V-NEXT: vadd.vv v12, v12, v12
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
+; RV64V-NEXT: vmv.v.v v8, v9
+; RV64V-NEXT: ret
;
; RV64ZVE32F-ZVFH-LABEL: mgather_baseidx_v8f16:
; RV64ZVE32F-ZVFH: # %bb.0:
@@ -12714,118 +9314,6 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m
; RV64ZVE32F-ZVFH-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-ZVFH-NEXT: ret
;
-; RV32ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_v8f16:
-; RV32ZVE32F-ZVFHMIN: # %bb.0:
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vwadd.vv v10, v8, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_9
-; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB77_10
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_2: # %else2
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB77_11
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_3: # %else5
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB77_12
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_4: # %else8
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB77_13
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_5: # %else11
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB77_14
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_6: # %else14
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB77_15
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_7: # %else17
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB77_16
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_8: # %else20
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_9: # %cond.load
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v9, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB77_2
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_10: # %cond.load1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB77_3
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_11: # %cond.load4
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 3, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB77_4
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_12: # %cond.load7
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 3
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 3
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB77_5
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_13: # %cond.load10
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 5, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB77_6
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_14: # %cond.load13
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB77_7
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_15: # %cond.load16
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 7, e16, m1, tu, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 6
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB77_8
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB77_16: # %cond.load19
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v10, 7
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.s.x v8, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 7
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv1r.v v8, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-;
; RV64ZVE32F-ZVFHMIN-LABEL: mgather_baseidx_v8f16:
; RV64ZVE32F-ZVFHMIN: # %bb.0:
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -19065,4 +15553,10 @@ define <4 x i32> @masked_gather_widen_sew_negative_stride(ptr %base) {
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32V-ZVFH: {{.*}}
+; RV32V-ZVFHMIN: {{.*}}
+; RV32ZVE32F-ZVFH: {{.*}}
+; RV32ZVE32F-ZVFHMIN: {{.*}}
; RV64: {{.*}}
+; RV64V-ZVFH: {{.*}}
+; RV64V-ZVFHMIN: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
index 3b642f1678e469..fcddec226ceab3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
@@ -5888,47 +5888,20 @@ declare void @llvm.masked.scatter.v1bf16.v1p0(<1 x bfloat>, <1 x ptr>, i32, <1 x
define void @mscatter_v1bf16(<1 x bfloat> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
; RV32V-LABEL: mscatter_v1bf16:
; RV32V: # %bb.0:
-; RV32V-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; RV32V-NEXT: vfirst.m a0, v0
-; RV32V-NEXT: bnez a0, .LBB52_2
-; RV32V-NEXT: # %bb.1: # %cond.store
-; RV32V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; RV32V-NEXT: vmv.x.s a0, v8
-; RV32V-NEXT: fmv.h.x fa5, a0
-; RV32V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; RV32V-NEXT: vmv.x.s a0, v9
-; RV32V-NEXT: fsh fa5, 0(a0)
-; RV32V-NEXT: .LBB52_2: # %else
+; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
; RV32V-NEXT: ret
;
; RV64V-LABEL: mscatter_v1bf16:
; RV64V: # %bb.0:
-; RV64V-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; RV64V-NEXT: vfirst.m a0, v0
-; RV64V-NEXT: bnez a0, .LBB52_2
-; RV64V-NEXT: # %bb.1: # %cond.store
-; RV64V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; RV64V-NEXT: vmv.x.s a0, v8
-; RV64V-NEXT: fmv.h.x fa5, a0
-; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; RV64V-NEXT: vmv.x.s a0, v9
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: .LBB52_2: # %else
+; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_v1bf16:
; RV32ZVE32F: # %bb.0:
-; RV32ZVE32F-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
-; RV32ZVE32F-NEXT: vfirst.m a0, v0
-; RV32ZVE32F-NEXT: bnez a0, .LBB52_2
-; RV32ZVE32F-NEXT: # %bb.1: # %cond.store
-; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV32ZVE32F-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32ZVE32F-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-NEXT: .LBB52_2: # %else
+; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_v1bf16:
@@ -5952,95 +5925,20 @@ declare void @llvm.masked.scatter.v2bf16.v2p0(<2 x bfloat>, <2 x ptr>, i32, <2 x
define void @mscatter_v2bf16(<2 x bfloat> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV32V-LABEL: mscatter_v2bf16:
; RV32V: # %bb.0:
-; RV32V-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32V-NEXT: vmv.x.s a0, v0
-; RV32V-NEXT: andi a1, a0, 1
-; RV32V-NEXT: bnez a1, .LBB53_3
-; RV32V-NEXT: # %bb.1: # %else
-; RV32V-NEXT: andi a0, a0, 2
-; RV32V-NEXT: bnez a0, .LBB53_4
-; RV32V-NEXT: .LBB53_2: # %else2
-; RV32V-NEXT: ret
-; RV32V-NEXT: .LBB53_3: # %cond.store
-; RV32V-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV32V-NEXT: vmv.x.s a1, v8
-; RV32V-NEXT: fmv.h.x fa5, a1
-; RV32V-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV32V-NEXT: vmv.x.s a1, v9
-; RV32V-NEXT: fsh fa5, 0(a1)
-; RV32V-NEXT: andi a0, a0, 2
-; RV32V-NEXT: beqz a0, .LBB53_2
-; RV32V-NEXT: .LBB53_4: # %cond.store1
-; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV32V-NEXT: vslidedown.vi v8, v8, 1
-; RV32V-NEXT: vmv.x.s a0, v8
-; RV32V-NEXT: fmv.h.x fa5, a0
-; RV32V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; RV32V-NEXT: vslidedown.vi v8, v9, 1
-; RV32V-NEXT: vmv.x.s a0, v8
-; RV32V-NEXT: fsh fa5, 0(a0)
+; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
; RV32V-NEXT: ret
;
; RV64V-LABEL: mscatter_v2bf16:
; RV64V: # %bb.0:
-; RV64V-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64V-NEXT: vmv.x.s a0, v0
-; RV64V-NEXT: andi a1, a0, 1
-; RV64V-NEXT: bnez a1, .LBB53_3
-; RV64V-NEXT: # %bb.1: # %else
-; RV64V-NEXT: andi a0, a0, 2
-; RV64V-NEXT: bnez a0, .LBB53_4
-; RV64V-NEXT: .LBB53_2: # %else2
-; RV64V-NEXT: ret
-; RV64V-NEXT: .LBB53_3: # %cond.store
-; RV64V-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64V-NEXT: vmv.x.s a1, v8
-; RV64V-NEXT: fmv.h.x fa5, a1
-; RV64V-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV64V-NEXT: vmv.x.s a1, v9
-; RV64V-NEXT: fsh fa5, 0(a1)
-; RV64V-NEXT: andi a0, a0, 2
-; RV64V-NEXT: beqz a0, .LBB53_2
-; RV64V-NEXT: .LBB53_4: # %cond.store1
-; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV64V-NEXT: vslidedown.vi v8, v8, 1
-; RV64V-NEXT: vmv.x.s a0, v8
-; RV64V-NEXT: fmv.h.x fa5, a0
-; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v8, v9, 1
-; RV64V-NEXT: vmv.x.s a0, v8
-; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mscatter_v2bf16:
; RV32ZVE32F: # %bb.0:
-; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32ZVE32F-NEXT: vmv.x.s a0, v0
-; RV32ZVE32F-NEXT: andi a1, a0, 1
-; RV32ZVE32F-NEXT: bnez a1, .LBB53_3
-; RV32ZVE32F-NEXT: # %bb.1: # %else
-; RV32ZVE32F-NEXT: andi a0, a0, 2
-; RV32ZVE32F-NEXT: bnez a0, .LBB53_4
-; RV32ZVE32F-NEXT: .LBB53_2: # %else2
-; RV32ZVE32F-NEXT: ret
-; RV32ZVE32F-NEXT: .LBB53_3: # %cond.store
-; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV32ZVE32F-NEXT: vmv.x.s a1, v8
-; RV32ZVE32F-NEXT: fmv.h.x fa5, a1
-; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV32ZVE32F-NEXT: vmv.x.s a1, v9
-; RV32ZVE32F-NEXT: fsh fa5, 0(a1)
-; RV32ZVE32F-NEXT: andi a0, a0, 2
-; RV32ZVE32F-NEXT: beqz a0, .LBB53_2
-; RV32ZVE32F-NEXT: .LBB53_4: # %cond.store1
-; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
-; RV32ZVE32F-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
-; RV32ZVE32F-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-NEXT: fsh fa5, 0(a0)
+; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_v2bf16:
@@ -6077,120 +5975,14 @@ declare void @llvm.masked.scatter.v4bf16.v4p0(<4 x bfloat>, <4 x ptr>, i32, <4 x
define void @mscatter_v4bf16(<4 x bfloat> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV32-LABEL: mscatter_v4bf16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv.x.s a0, v0
-; RV32-NEXT: andi a1, a0, 1
-; RV32-NEXT: bnez a1, .LBB54_5
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a1, a0, 2
-; RV32-NEXT: bnez a1, .LBB54_6
-; RV32-NEXT: .LBB54_2: # %else2
-; RV32-NEXT: andi a1, a0, 4
-; RV32-NEXT: bnez a1, .LBB54_7
-; RV32-NEXT: .LBB54_3: # %else4
-; RV32-NEXT: andi a0, a0, 8
-; RV32-NEXT: bnez a0, .LBB54_8
-; RV32-NEXT: .LBB54_4: # %else6
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB54_5: # %cond.store
-; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV32-NEXT: vmv.x.s a1, v8
-; RV32-NEXT: fmv.h.x fa5, a1
-; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV32-NEXT: vmv.x.s a1, v9
-; RV32-NEXT: fsh fa5, 0(a1)
-; RV32-NEXT: andi a1, a0, 2
-; RV32-NEXT: beqz a1, .LBB54_2
-; RV32-NEXT: .LBB54_6: # %cond.store1
-; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 1
-; RV32-NEXT: vmv.x.s a1, v10
-; RV32-NEXT: fmv.h.x fa5, a1
-; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v9, 1
-; RV32-NEXT: vmv.x.s a1, v10
-; RV32-NEXT: fsh fa5, 0(a1)
-; RV32-NEXT: andi a1, a0, 4
-; RV32-NEXT: beqz a1, .LBB54_3
-; RV32-NEXT: .LBB54_7: # %cond.store3
-; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 2
-; RV32-NEXT: vmv.x.s a1, v10
-; RV32-NEXT: fmv.h.x fa5, a1
-; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v9, 2
-; RV32-NEXT: vmv.x.s a1, v10
-; RV32-NEXT: fsh fa5, 0(a1)
-; RV32-NEXT: andi a0, a0, 8
-; RV32-NEXT: beqz a0, .LBB54_4
-; RV32-NEXT: .LBB54_8: # %cond.store5
-; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v9, 3
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
; RV32-NEXT: ret
;
; RV64V-LABEL: mscatter_v4bf16:
; RV64V: # %bb.0:
-; RV64V-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64V-NEXT: vmv.x.s a0, v0
-; RV64V-NEXT: andi a1, a0, 1
-; RV64V-NEXT: bnez a1, .LBB54_5
-; RV64V-NEXT: # %bb.1: # %else
-; RV64V-NEXT: andi a1, a0, 2
-; RV64V-NEXT: bnez a1, .LBB54_6
-; RV64V-NEXT: .LBB54_2: # %else2
-; RV64V-NEXT: andi a1, a0, 4
-; RV64V-NEXT: bnez a1, .LBB54_7
-; RV64V-NEXT: .LBB54_3: # %else4
-; RV64V-NEXT: andi a0, a0, 8
-; RV64V-NEXT: bnez a0, .LBB54_8
-; RV64V-NEXT: .LBB54_4: # %else6
-; RV64V-NEXT: ret
-; RV64V-NEXT: .LBB54_5: # %cond.store
-; RV64V-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64V-NEXT: vmv.x.s a1, v8
-; RV64V-NEXT: fmv.h.x fa5, a1
-; RV64V-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV64V-NEXT: vmv.x.s a1, v10
-; RV64V-NEXT: fsh fa5, 0(a1)
-; RV64V-NEXT: andi a1, a0, 2
-; RV64V-NEXT: beqz a1, .LBB54_2
-; RV64V-NEXT: .LBB54_6: # %cond.store1
-; RV64V-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64V-NEXT: vslidedown.vi v9, v8, 1
-; RV64V-NEXT: vmv.x.s a1, v9
-; RV64V-NEXT: fmv.h.x fa5, a1
-; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v9, v10, 1
-; RV64V-NEXT: vmv.x.s a1, v9
-; RV64V-NEXT: fsh fa5, 0(a1)
-; RV64V-NEXT: andi a1, a0, 4
-; RV64V-NEXT: beqz a1, .LBB54_3
-; RV64V-NEXT: .LBB54_7: # %cond.store3
-; RV64V-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64V-NEXT: vslidedown.vi v9, v8, 2
-; RV64V-NEXT: vmv.x.s a1, v9
-; RV64V-NEXT: fmv.h.x fa5, a1
-; RV64V-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64V-NEXT: vslidedown.vi v12, v10, 2
-; RV64V-NEXT: vmv.x.s a1, v12
-; RV64V-NEXT: fsh fa5, 0(a1)
-; RV64V-NEXT: andi a0, a0, 8
-; RV64V-NEXT: beqz a0, .LBB54_4
-; RV64V-NEXT: .LBB54_8: # %cond.store5
-; RV64V-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64V-NEXT: vslidedown.vi v8, v8, 3
-; RV64V-NEXT: vmv.x.s a0, v8
-; RV64V-NEXT: fmv.h.x fa5, a0
-; RV64V-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64V-NEXT: vslidedown.vi v8, v10, 3
-; RV64V-NEXT: vmv.x.s a0, v8
-; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_v4bf16:
@@ -6251,70 +6043,14 @@ define void @mscatter_v4bf16(<4 x bfloat> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
define void @mscatter_truemask_v4bf16(<4 x bfloat> %val, <4 x ptr> %ptrs) {
; RV32-LABEL: mscatter_truemask_v4bf16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 1
-; RV32-NEXT: vmv.x.s a0, v10
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v9, 1
-; RV32-NEXT: vmv.x.s a0, v10
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 2
-; RV32-NEXT: vmv.x.s a0, v10
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v9, 2
-; RV32-NEXT: vmv.x.s a0, v10
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v9, 3
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT: vsoxei32.v v8, (zero), v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mscatter_truemask_v4bf16:
; RV64V: # %bb.0:
-; RV64V-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64V-NEXT: vmv.x.s a0, v8
-; RV64V-NEXT: fmv.h.x fa5, a0
-; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64V-NEXT: vmv.x.s a0, v10
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64V-NEXT: vslidedown.vi v9, v8, 1
-; RV64V-NEXT: vmv.x.s a0, v9
-; RV64V-NEXT: fmv.h.x fa5, a0
-; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v9, v10, 1
-; RV64V-NEXT: vmv.x.s a0, v9
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64V-NEXT: vslidedown.vi v9, v8, 2
-; RV64V-NEXT: vmv.x.s a0, v9
-; RV64V-NEXT: fmv.h.x fa5, a0
-; RV64V-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64V-NEXT: vslidedown.vi v12, v10, 2
-; RV64V-NEXT: vmv.x.s a0, v12
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64V-NEXT: vslidedown.vi v8, v8, 3
-; RV64V-NEXT: vmv.x.s a0, v8
-; RV64V-NEXT: fmv.h.x fa5, a0
-; RV64V-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64V-NEXT: vslidedown.vi v8, v10, 3
-; RV64V-NEXT: vmv.x.s a0, v8
-; RV64V-NEXT: fsh fa5, 0(a0)
+; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_truemask_v4bf16:
@@ -6357,245 +6093,15 @@ declare void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat>, <8 x ptr>, i32, <8 x
define void @mscatter_v8bf16(<8 x bfloat> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV32-LABEL: mscatter_v8bf16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32-NEXT: vmv.x.s a0, v0
-; RV32-NEXT: andi a1, a0, 1
-; RV32-NEXT: bnez a1, .LBB57_9
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a1, a0, 2
-; RV32-NEXT: bnez a1, .LBB57_10
-; RV32-NEXT: .LBB57_2: # %else2
-; RV32-NEXT: andi a1, a0, 4
-; RV32-NEXT: bnez a1, .LBB57_11
-; RV32-NEXT: .LBB57_3: # %else4
-; RV32-NEXT: andi a1, a0, 8
-; RV32-NEXT: bnez a1, .LBB57_12
-; RV32-NEXT: .LBB57_4: # %else6
-; RV32-NEXT: andi a1, a0, 16
-; RV32-NEXT: bnez a1, .LBB57_13
-; RV32-NEXT: .LBB57_5: # %else8
-; RV32-NEXT: andi a1, a0, 32
-; RV32-NEXT: bnez a1, .LBB57_14
-; RV32-NEXT: .LBB57_6: # %else10
-; RV32-NEXT: andi a1, a0, 64
-; RV32-NEXT: bnez a1, .LBB57_15
-; RV32-NEXT: .LBB57_7: # %else12
-; RV32-NEXT: andi a0, a0, -128
-; RV32-NEXT: bnez a0, .LBB57_16
-; RV32-NEXT: .LBB57_8: # %else14
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB57_9: # %cond.store
-; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV32-NEXT: vmv.x.s a1, v8
-; RV32-NEXT: fmv.h.x fa5, a1
-; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV32-NEXT: vmv.x.s a1, v10
-; RV32-NEXT: fsh fa5, 0(a1)
-; RV32-NEXT: andi a1, a0, 2
-; RV32-NEXT: beqz a1, .LBB57_2
-; RV32-NEXT: .LBB57_10: # %cond.store1
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 1
-; RV32-NEXT: vmv.x.s a1, v9
-; RV32-NEXT: fmv.h.x fa5, a1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v10, 1
-; RV32-NEXT: vmv.x.s a1, v9
-; RV32-NEXT: fsh fa5, 0(a1)
-; RV32-NEXT: andi a1, a0, 4
-; RV32-NEXT: beqz a1, .LBB57_3
-; RV32-NEXT: .LBB57_11: # %cond.store3
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: vmv.x.s a1, v9
-; RV32-NEXT: fmv.h.x fa5, a1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v10, 2
-; RV32-NEXT: vmv.x.s a1, v9
-; RV32-NEXT: fsh fa5, 0(a1)
-; RV32-NEXT: andi a1, a0, 8
-; RV32-NEXT: beqz a1, .LBB57_4
-; RV32-NEXT: .LBB57_12: # %cond.store5
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 3
-; RV32-NEXT: vmv.x.s a1, v9
-; RV32-NEXT: fmv.h.x fa5, a1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v10, 3
-; RV32-NEXT: vmv.x.s a1, v9
-; RV32-NEXT: fsh fa5, 0(a1)
-; RV32-NEXT: andi a1, a0, 16
-; RV32-NEXT: beqz a1, .LBB57_5
-; RV32-NEXT: .LBB57_13: # %cond.store7
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 4
-; RV32-NEXT: vmv.x.s a1, v9
-; RV32-NEXT: fmv.h.x fa5, a1
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v10, 4
-; RV32-NEXT: vmv.x.s a1, v12
-; RV32-NEXT: fsh fa5, 0(a1)
-; RV32-NEXT: andi a1, a0, 32
-; RV32-NEXT: beqz a1, .LBB57_6
-; RV32-NEXT: .LBB57_14: # %cond.store9
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 5
-; RV32-NEXT: vmv.x.s a1, v9
-; RV32-NEXT: fmv.h.x fa5, a1
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v10, 5
-; RV32-NEXT: vmv.x.s a1, v12
-; RV32-NEXT: fsh fa5, 0(a1)
-; RV32-NEXT: andi a1, a0, 64
-; RV32-NEXT: beqz a1, .LBB57_7
-; RV32-NEXT: .LBB57_15: # %cond.store11
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 6
-; RV32-NEXT: vmv.x.s a1, v9
-; RV32-NEXT: fmv.h.x fa5, a1
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v10, 6
-; RV32-NEXT: vmv.x.s a1, v12
-; RV32-NEXT: fsh fa5, 0(a1)
-; RV32-NEXT: andi a0, a0, -128
-; RV32-NEXT: beqz a0, .LBB57_8
-; RV32-NEXT: .LBB57_16: # %cond.store13
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 7
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v10, 7
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
; RV32-NEXT: ret
;
; RV64V-LABEL: mscatter_v8bf16:
; RV64V: # %bb.0:
-; RV64V-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64V-NEXT: vmv.x.s a0, v0
-; RV64V-NEXT: andi a1, a0, 1
-; RV64V-NEXT: bnez a1, .LBB57_11
-; RV64V-NEXT: # %bb.1: # %else
-; RV64V-NEXT: andi a1, a0, 2
-; RV64V-NEXT: bnez a1, .LBB57_12
-; RV64V-NEXT: .LBB57_2: # %else2
-; RV64V-NEXT: andi a1, a0, 4
-; RV64V-NEXT: bnez a1, .LBB57_13
-; RV64V-NEXT: .LBB57_3: # %else4
-; RV64V-NEXT: andi a1, a0, 8
-; RV64V-NEXT: beqz a1, .LBB57_5
-; RV64V-NEXT: .LBB57_4: # %cond.store5
-; RV64V-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v9, v8, 3
-; RV64V-NEXT: vmv.x.s a1, v9
-; RV64V-NEXT: fmv.h.x fa5, a1
-; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-NEXT: vslidedown.vi v10, v12, 3
-; RV64V-NEXT: vmv.x.s a1, v10
-; RV64V-NEXT: fsh fa5, 0(a1)
-; RV64V-NEXT: .LBB57_5: # %else6
-; RV64V-NEXT: addi sp, sp, -320
-; RV64V-NEXT: .cfi_def_cfa_offset 320
-; RV64V-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
-; RV64V-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
-; RV64V-NEXT: .cfi_offset ra, -8
-; RV64V-NEXT: .cfi_offset s0, -16
-; RV64V-NEXT: addi s0, sp, 320
-; RV64V-NEXT: .cfi_def_cfa s0, 0
-; RV64V-NEXT: andi sp, sp, -64
-; RV64V-NEXT: andi a1, a0, 16
-; RV64V-NEXT: bnez a1, .LBB57_14
-; RV64V-NEXT: # %bb.6: # %else8
-; RV64V-NEXT: andi a1, a0, 32
-; RV64V-NEXT: bnez a1, .LBB57_15
-; RV64V-NEXT: .LBB57_7: # %else10
-; RV64V-NEXT: andi a1, a0, 64
-; RV64V-NEXT: bnez a1, .LBB57_16
-; RV64V-NEXT: .LBB57_8: # %else12
-; RV64V-NEXT: andi a0, a0, -128
-; RV64V-NEXT: beqz a0, .LBB57_10
-; RV64V-NEXT: .LBB57_9: # %cond.store13
-; RV64V-NEXT: mv a0, sp
; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 56(sp)
-; RV64V-NEXT: vslidedown.vi v8, v8, 7
-; RV64V-NEXT: vmv.x.s a1, v8
-; RV64V-NEXT: fmv.h.x fa5, a1
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: .LBB57_10: # %else14
-; RV64V-NEXT: addi sp, s0, -320
-; RV64V-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
-; RV64V-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
-; RV64V-NEXT: addi sp, sp, 320
+; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t
; RV64V-NEXT: ret
-; RV64V-NEXT: .LBB57_11: # %cond.store
-; RV64V-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64V-NEXT: vmv.x.s a1, v8
-; RV64V-NEXT: fmv.h.x fa5, a1
-; RV64V-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV64V-NEXT: vmv.x.s a1, v12
-; RV64V-NEXT: fsh fa5, 0(a1)
-; RV64V-NEXT: andi a1, a0, 2
-; RV64V-NEXT: beqz a1, .LBB57_2
-; RV64V-NEXT: .LBB57_12: # %cond.store1
-; RV64V-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v9, v8, 1
-; RV64V-NEXT: vmv.x.s a1, v9
-; RV64V-NEXT: fmv.h.x fa5, a1
-; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v9, v12, 1
-; RV64V-NEXT: vmv.x.s a1, v9
-; RV64V-NEXT: fsh fa5, 0(a1)
-; RV64V-NEXT: andi a1, a0, 4
-; RV64V-NEXT: beqz a1, .LBB57_3
-; RV64V-NEXT: .LBB57_13: # %cond.store3
-; RV64V-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v9, v8, 2
-; RV64V-NEXT: vmv.x.s a1, v9
-; RV64V-NEXT: fmv.h.x fa5, a1
-; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-NEXT: vslidedown.vi v10, v12, 2
-; RV64V-NEXT: vmv.x.s a1, v10
-; RV64V-NEXT: fsh fa5, 0(a1)
-; RV64V-NEXT: andi a1, a0, 8
-; RV64V-NEXT: bnez a1, .LBB57_4
-; RV64V-NEXT: j .LBB57_5
-; RV64V-NEXT: .LBB57_14: # %cond.store7
-; RV64V-NEXT: addi a1, sp, 192
-; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-NEXT: vse64.v v12, (a1)
-; RV64V-NEXT: ld a1, 224(sp)
-; RV64V-NEXT: vslidedown.vi v9, v8, 4
-; RV64V-NEXT: vmv.x.s a2, v9
-; RV64V-NEXT: fmv.h.x fa5, a2
-; RV64V-NEXT: fsh fa5, 0(a1)
-; RV64V-NEXT: andi a1, a0, 32
-; RV64V-NEXT: beqz a1, .LBB57_7
-; RV64V-NEXT: .LBB57_15: # %cond.store9
-; RV64V-NEXT: addi a1, sp, 128
-; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-NEXT: vse64.v v12, (a1)
-; RV64V-NEXT: ld a1, 168(sp)
-; RV64V-NEXT: vslidedown.vi v9, v8, 5
-; RV64V-NEXT: vmv.x.s a2, v9
-; RV64V-NEXT: fmv.h.x fa5, a2
-; RV64V-NEXT: fsh fa5, 0(a1)
-; RV64V-NEXT: andi a1, a0, 64
-; RV64V-NEXT: beqz a1, .LBB57_8
-; RV64V-NEXT: .LBB57_16: # %cond.store11
-; RV64V-NEXT: addi a1, sp, 64
-; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-NEXT: vse64.v v12, (a1)
-; RV64V-NEXT: ld a1, 112(sp)
-; RV64V-NEXT: vslidedown.vi v9, v8, 6
-; RV64V-NEXT: vmv.x.s a2, v9
-; RV64V-NEXT: fmv.h.x fa5, a2
-; RV64V-NEXT: fsh fa5, 0(a1)
-; RV64V-NEXT: andi a0, a0, -128
-; RV64V-NEXT: bnez a0, .LBB57_9
-; RV64V-NEXT: j .LBB57_10
;
; RV64ZVE32F-LABEL: mscatter_v8bf16:
; RV64ZVE32F: # %bb.0:
@@ -6706,119 +6212,8 @@ define void @mscatter_baseidx_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i8>
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vsext.vf4 v10, v9
; RV32-NEXT: vadd.vv v10, v10, v10
-; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vadd.vx v10, v10, a0
-; RV32-NEXT: bnez a2, .LBB58_9
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a0, a1, 2
-; RV32-NEXT: bnez a0, .LBB58_10
-; RV32-NEXT: .LBB58_2: # %else2
-; RV32-NEXT: andi a0, a1, 4
-; RV32-NEXT: bnez a0, .LBB58_11
-; RV32-NEXT: .LBB58_3: # %else4
-; RV32-NEXT: andi a0, a1, 8
-; RV32-NEXT: bnez a0, .LBB58_12
-; RV32-NEXT: .LBB58_4: # %else6
-; RV32-NEXT: andi a0, a1, 16
-; RV32-NEXT: bnez a0, .LBB58_13
-; RV32-NEXT: .LBB58_5: # %else8
-; RV32-NEXT: andi a0, a1, 32
-; RV32-NEXT: bnez a0, .LBB58_14
-; RV32-NEXT: .LBB58_6: # %else10
-; RV32-NEXT: andi a0, a1, 64
-; RV32-NEXT: bnez a0, .LBB58_15
-; RV32-NEXT: .LBB58_7: # %else12
-; RV32-NEXT: andi a0, a1, -128
-; RV32-NEXT: bnez a0, .LBB58_16
-; RV32-NEXT: .LBB58_8: # %else14
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB58_9: # %cond.store
; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vmv.x.s a0, v10
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: andi a0, a1, 2
-; RV32-NEXT: beqz a0, .LBB58_2
-; RV32-NEXT: .LBB58_10: # %cond.store1
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 1
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v10, 1
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: andi a0, a1, 4
-; RV32-NEXT: beqz a0, .LBB58_3
-; RV32-NEXT: .LBB58_11: # %cond.store3
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v10, 2
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: andi a0, a1, 8
-; RV32-NEXT: beqz a0, .LBB58_4
-; RV32-NEXT: .LBB58_12: # %cond.store5
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 3
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v10, 3
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: andi a0, a1, 16
-; RV32-NEXT: beqz a0, .LBB58_5
-; RV32-NEXT: .LBB58_13: # %cond.store7
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 4
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v10, 4
-; RV32-NEXT: vmv.x.s a0, v12
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: andi a0, a1, 32
-; RV32-NEXT: beqz a0, .LBB58_6
-; RV32-NEXT: .LBB58_14: # %cond.store9
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 5
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v10, 5
-; RV32-NEXT: vmv.x.s a0, v12
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: andi a0, a1, 64
-; RV32-NEXT: beqz a0, .LBB58_7
-; RV32-NEXT: .LBB58_15: # %cond.store11
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 6
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v10, 6
-; RV32-NEXT: vmv.x.s a0, v12
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: andi a0, a1, -128
-; RV32-NEXT: beqz a0, .LBB58_8
-; RV32-NEXT: .LBB58_16: # %cond.store13
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 7
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v10, 7
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
; RV32-NEXT: ret
;
; RV64V-LABEL: mscatter_baseidx_v8i8_v8bf16:
@@ -6826,132 +6221,9 @@ define void @mscatter_baseidx_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i8>
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf8 v12, v9
; RV64V-NEXT: vadd.vv v12, v12, v12
-; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64V-NEXT: vmv.x.s a1, v0
-; RV64V-NEXT: andi a2, a1, 1
-; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64V-NEXT: vadd.vx v12, v12, a0
-; RV64V-NEXT: bnez a2, .LBB58_11
-; RV64V-NEXT: # %bb.1: # %else
-; RV64V-NEXT: andi a0, a1, 2
-; RV64V-NEXT: bnez a0, .LBB58_12
-; RV64V-NEXT: .LBB58_2: # %else2
-; RV64V-NEXT: andi a0, a1, 4
-; RV64V-NEXT: bnez a0, .LBB58_13
-; RV64V-NEXT: .LBB58_3: # %else4
-; RV64V-NEXT: andi a0, a1, 8
-; RV64V-NEXT: beqz a0, .LBB58_5
-; RV64V-NEXT: .LBB58_4: # %cond.store5
-; RV64V-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v9, v8, 3
-; RV64V-NEXT: vmv.x.s a0, v9
-; RV64V-NEXT: fmv.h.x fa5, a0
-; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-NEXT: vslidedown.vi v10, v12, 3
-; RV64V-NEXT: vmv.x.s a0, v10
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: .LBB58_5: # %else6
-; RV64V-NEXT: addi sp, sp, -320
-; RV64V-NEXT: .cfi_def_cfa_offset 320
-; RV64V-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
-; RV64V-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
-; RV64V-NEXT: .cfi_offset ra, -8
-; RV64V-NEXT: .cfi_offset s0, -16
-; RV64V-NEXT: addi s0, sp, 320
-; RV64V-NEXT: .cfi_def_cfa s0, 0
-; RV64V-NEXT: andi sp, sp, -64
-; RV64V-NEXT: andi a0, a1, 16
-; RV64V-NEXT: bnez a0, .LBB58_14
-; RV64V-NEXT: # %bb.6: # %else8
-; RV64V-NEXT: andi a0, a1, 32
-; RV64V-NEXT: bnez a0, .LBB58_15
-; RV64V-NEXT: .LBB58_7: # %else10
-; RV64V-NEXT: andi a0, a1, 64
-; RV64V-NEXT: bnez a0, .LBB58_16
-; RV64V-NEXT: .LBB58_8: # %else12
-; RV64V-NEXT: andi a0, a1, -128
-; RV64V-NEXT: beqz a0, .LBB58_10
-; RV64V-NEXT: .LBB58_9: # %cond.store13
-; RV64V-NEXT: mv a0, sp
-; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 56(sp)
-; RV64V-NEXT: vslidedown.vi v8, v8, 7
-; RV64V-NEXT: vmv.x.s a1, v8
-; RV64V-NEXT: fmv.h.x fa5, a1
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: .LBB58_10: # %else14
-; RV64V-NEXT: addi sp, s0, -320
-; RV64V-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
-; RV64V-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
-; RV64V-NEXT: addi sp, sp, 320
-; RV64V-NEXT: ret
-; RV64V-NEXT: .LBB58_11: # %cond.store
; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-NEXT: vmv.x.s a0, v8
-; RV64V-NEXT: fmv.h.x fa5, a0
-; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64V-NEXT: vmv.x.s a0, v12
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: andi a0, a1, 2
-; RV64V-NEXT: beqz a0, .LBB58_2
-; RV64V-NEXT: .LBB58_12: # %cond.store1
-; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v9, v8, 1
-; RV64V-NEXT: vmv.x.s a0, v9
-; RV64V-NEXT: fmv.h.x fa5, a0
-; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v9, v12, 1
-; RV64V-NEXT: vmv.x.s a0, v9
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: andi a0, a1, 4
-; RV64V-NEXT: beqz a0, .LBB58_3
-; RV64V-NEXT: .LBB58_13: # %cond.store3
-; RV64V-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v9, v8, 2
-; RV64V-NEXT: vmv.x.s a0, v9
-; RV64V-NEXT: fmv.h.x fa5, a0
-; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-NEXT: vslidedown.vi v10, v12, 2
-; RV64V-NEXT: vmv.x.s a0, v10
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: andi a0, a1, 8
-; RV64V-NEXT: bnez a0, .LBB58_4
-; RV64V-NEXT: j .LBB58_5
-; RV64V-NEXT: .LBB58_14: # %cond.store7
-; RV64V-NEXT: addi a0, sp, 192
-; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 224(sp)
-; RV64V-NEXT: vslidedown.vi v9, v8, 4
-; RV64V-NEXT: vmv.x.s a2, v9
-; RV64V-NEXT: fmv.h.x fa5, a2
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: andi a0, a1, 32
-; RV64V-NEXT: beqz a0, .LBB58_7
-; RV64V-NEXT: .LBB58_15: # %cond.store9
-; RV64V-NEXT: addi a0, sp, 128
-; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 168(sp)
-; RV64V-NEXT: vslidedown.vi v9, v8, 5
-; RV64V-NEXT: vmv.x.s a2, v9
-; RV64V-NEXT: fmv.h.x fa5, a2
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: andi a0, a1, 64
-; RV64V-NEXT: beqz a0, .LBB58_8
-; RV64V-NEXT: .LBB58_16: # %cond.store11
-; RV64V-NEXT: addi a0, sp, 64
-; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 112(sp)
-; RV64V-NEXT: vslidedown.vi v9, v8, 6
-; RV64V-NEXT: vmv.x.s a2, v9
-; RV64V-NEXT: fmv.h.x fa5, a2
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: andi a0, a1, -128
-; RV64V-NEXT: bnez a0, .LBB58_9
-; RV64V-NEXT: j .LBB58_10
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8bf16:
; RV64ZVE32F: # %bb.0:
@@ -7089,119 +6361,8 @@ define void @mscatter_baseidx_sext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vsext.vf4 v10, v9
; RV32-NEXT: vadd.vv v10, v10, v10
-; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vadd.vx v10, v10, a0
-; RV32-NEXT: bnez a2, .LBB59_9
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a0, a1, 2
-; RV32-NEXT: bnez a0, .LBB59_10
-; RV32-NEXT: .LBB59_2: # %else2
-; RV32-NEXT: andi a0, a1, 4
-; RV32-NEXT: bnez a0, .LBB59_11
-; RV32-NEXT: .LBB59_3: # %else4
-; RV32-NEXT: andi a0, a1, 8
-; RV32-NEXT: bnez a0, .LBB59_12
-; RV32-NEXT: .LBB59_4: # %else6
-; RV32-NEXT: andi a0, a1, 16
-; RV32-NEXT: bnez a0, .LBB59_13
-; RV32-NEXT: .LBB59_5: # %else8
-; RV32-NEXT: andi a0, a1, 32
-; RV32-NEXT: bnez a0, .LBB59_14
-; RV32-NEXT: .LBB59_6: # %else10
-; RV32-NEXT: andi a0, a1, 64
-; RV32-NEXT: bnez a0, .LBB59_15
-; RV32-NEXT: .LBB59_7: # %else12
-; RV32-NEXT: andi a0, a1, -128
-; RV32-NEXT: bnez a0, .LBB59_16
-; RV32-NEXT: .LBB59_8: # %else14
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB59_9: # %cond.store
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vmv.x.s a0, v10
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: andi a0, a1, 2
-; RV32-NEXT: beqz a0, .LBB59_2
-; RV32-NEXT: .LBB59_10: # %cond.store1
; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 1
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v10, 1
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: andi a0, a1, 4
-; RV32-NEXT: beqz a0, .LBB59_3
-; RV32-NEXT: .LBB59_11: # %cond.store3
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v10, 2
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: andi a0, a1, 8
-; RV32-NEXT: beqz a0, .LBB59_4
-; RV32-NEXT: .LBB59_12: # %cond.store5
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 3
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v10, 3
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: andi a0, a1, 16
-; RV32-NEXT: beqz a0, .LBB59_5
-; RV32-NEXT: .LBB59_13: # %cond.store7
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 4
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v10, 4
-; RV32-NEXT: vmv.x.s a0, v12
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: andi a0, a1, 32
-; RV32-NEXT: beqz a0, .LBB59_6
-; RV32-NEXT: .LBB59_14: # %cond.store9
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 5
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v10, 5
-; RV32-NEXT: vmv.x.s a0, v12
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: andi a0, a1, 64
-; RV32-NEXT: beqz a0, .LBB59_7
-; RV32-NEXT: .LBB59_15: # %cond.store11
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 6
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v10, 6
-; RV32-NEXT: vmv.x.s a0, v12
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: andi a0, a1, -128
-; RV32-NEXT: beqz a0, .LBB59_8
-; RV32-NEXT: .LBB59_16: # %cond.store13
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 7
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v10, 7
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
; RV32-NEXT: ret
;
; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8bf16:
@@ -7209,132 +6370,9 @@ define void @mscatter_baseidx_sext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf8 v12, v9
; RV64V-NEXT: vadd.vv v12, v12, v12
-; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64V-NEXT: vmv.x.s a1, v0
-; RV64V-NEXT: andi a2, a1, 1
-; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64V-NEXT: vadd.vx v12, v12, a0
-; RV64V-NEXT: bnez a2, .LBB59_11
-; RV64V-NEXT: # %bb.1: # %else
-; RV64V-NEXT: andi a0, a1, 2
-; RV64V-NEXT: bnez a0, .LBB59_12
-; RV64V-NEXT: .LBB59_2: # %else2
-; RV64V-NEXT: andi a0, a1, 4
-; RV64V-NEXT: bnez a0, .LBB59_13
-; RV64V-NEXT: .LBB59_3: # %else4
-; RV64V-NEXT: andi a0, a1, 8
-; RV64V-NEXT: beqz a0, .LBB59_5
-; RV64V-NEXT: .LBB59_4: # %cond.store5
-; RV64V-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v9, v8, 3
-; RV64V-NEXT: vmv.x.s a0, v9
-; RV64V-NEXT: fmv.h.x fa5, a0
-; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-NEXT: vslidedown.vi v10, v12, 3
-; RV64V-NEXT: vmv.x.s a0, v10
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: .LBB59_5: # %else6
-; RV64V-NEXT: addi sp, sp, -320
-; RV64V-NEXT: .cfi_def_cfa_offset 320
-; RV64V-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
-; RV64V-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
-; RV64V-NEXT: .cfi_offset ra, -8
-; RV64V-NEXT: .cfi_offset s0, -16
-; RV64V-NEXT: addi s0, sp, 320
-; RV64V-NEXT: .cfi_def_cfa s0, 0
-; RV64V-NEXT: andi sp, sp, -64
-; RV64V-NEXT: andi a0, a1, 16
-; RV64V-NEXT: bnez a0, .LBB59_14
-; RV64V-NEXT: # %bb.6: # %else8
-; RV64V-NEXT: andi a0, a1, 32
-; RV64V-NEXT: bnez a0, .LBB59_15
-; RV64V-NEXT: .LBB59_7: # %else10
-; RV64V-NEXT: andi a0, a1, 64
-; RV64V-NEXT: bnez a0, .LBB59_16
-; RV64V-NEXT: .LBB59_8: # %else12
-; RV64V-NEXT: andi a0, a1, -128
-; RV64V-NEXT: beqz a0, .LBB59_10
-; RV64V-NEXT: .LBB59_9: # %cond.store13
-; RV64V-NEXT: mv a0, sp
-; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 56(sp)
-; RV64V-NEXT: vslidedown.vi v8, v8, 7
-; RV64V-NEXT: vmv.x.s a1, v8
-; RV64V-NEXT: fmv.h.x fa5, a1
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: .LBB59_10: # %else14
-; RV64V-NEXT: addi sp, s0, -320
-; RV64V-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
-; RV64V-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
-; RV64V-NEXT: addi sp, sp, 320
-; RV64V-NEXT: ret
-; RV64V-NEXT: .LBB59_11: # %cond.store
; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-NEXT: vmv.x.s a0, v8
-; RV64V-NEXT: fmv.h.x fa5, a0
-; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64V-NEXT: vmv.x.s a0, v12
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: andi a0, a1, 2
-; RV64V-NEXT: beqz a0, .LBB59_2
-; RV64V-NEXT: .LBB59_12: # %cond.store1
-; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v9, v8, 1
-; RV64V-NEXT: vmv.x.s a0, v9
-; RV64V-NEXT: fmv.h.x fa5, a0
-; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v9, v12, 1
-; RV64V-NEXT: vmv.x.s a0, v9
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: andi a0, a1, 4
-; RV64V-NEXT: beqz a0, .LBB59_3
-; RV64V-NEXT: .LBB59_13: # %cond.store3
-; RV64V-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v9, v8, 2
-; RV64V-NEXT: vmv.x.s a0, v9
-; RV64V-NEXT: fmv.h.x fa5, a0
-; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-NEXT: vslidedown.vi v10, v12, 2
-; RV64V-NEXT: vmv.x.s a0, v10
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: andi a0, a1, 8
-; RV64V-NEXT: bnez a0, .LBB59_4
-; RV64V-NEXT: j .LBB59_5
-; RV64V-NEXT: .LBB59_14: # %cond.store7
-; RV64V-NEXT: addi a0, sp, 192
-; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 224(sp)
-; RV64V-NEXT: vslidedown.vi v9, v8, 4
-; RV64V-NEXT: vmv.x.s a2, v9
-; RV64V-NEXT: fmv.h.x fa5, a2
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: andi a0, a1, 32
-; RV64V-NEXT: beqz a0, .LBB59_7
-; RV64V-NEXT: .LBB59_15: # %cond.store9
-; RV64V-NEXT: addi a0, sp, 128
-; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 168(sp)
-; RV64V-NEXT: vslidedown.vi v9, v8, 5
-; RV64V-NEXT: vmv.x.s a2, v9
-; RV64V-NEXT: fmv.h.x fa5, a2
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: andi a0, a1, 64
-; RV64V-NEXT: beqz a0, .LBB59_8
-; RV64V-NEXT: .LBB59_16: # %cond.store11
-; RV64V-NEXT: addi a0, sp, 64
-; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 112(sp)
-; RV64V-NEXT: vslidedown.vi v9, v8, 6
-; RV64V-NEXT: vmv.x.s a2, v9
-; RV64V-NEXT: fmv.h.x fa5, a2
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: andi a0, a1, -128
-; RV64V-NEXT: bnez a0, .LBB59_9
-; RV64V-NEXT: j .LBB59_10
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8bf16:
; RV64ZVE32F: # %bb.0:
@@ -7470,255 +6508,19 @@ define void @mscatter_baseidx_sext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8
define void @mscatter_baseidx_zext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8bf16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vzext.vf4 v10, v9
-; RV32-NEXT: vadd.vv v10, v10, v10
-; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vadd.vx v10, v10, a0
-; RV32-NEXT: bnez a2, .LBB60_9
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a0, a1, 2
-; RV32-NEXT: bnez a0, .LBB60_10
-; RV32-NEXT: .LBB60_2: # %else2
-; RV32-NEXT: andi a0, a1, 4
-; RV32-NEXT: bnez a0, .LBB60_11
-; RV32-NEXT: .LBB60_3: # %else4
-; RV32-NEXT: andi a0, a1, 8
-; RV32-NEXT: bnez a0, .LBB60_12
-; RV32-NEXT: .LBB60_4: # %else6
-; RV32-NEXT: andi a0, a1, 16
-; RV32-NEXT: bnez a0, .LBB60_13
-; RV32-NEXT: .LBB60_5: # %else8
-; RV32-NEXT: andi a0, a1, 32
-; RV32-NEXT: bnez a0, .LBB60_14
-; RV32-NEXT: .LBB60_6: # %else10
-; RV32-NEXT: andi a0, a1, 64
-; RV32-NEXT: bnez a0, .LBB60_15
-; RV32-NEXT: .LBB60_7: # %else12
-; RV32-NEXT: andi a0, a1, -128
-; RV32-NEXT: bnez a0, .LBB60_16
-; RV32-NEXT: .LBB60_8: # %else14
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB60_9: # %cond.store
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vmv.x.s a0, v10
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: andi a0, a1, 2
-; RV32-NEXT: beqz a0, .LBB60_2
-; RV32-NEXT: .LBB60_10: # %cond.store1
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vwaddu.vv v10, v9, v9
; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 1
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v10, 1
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: andi a0, a1, 4
-; RV32-NEXT: beqz a0, .LBB60_3
-; RV32-NEXT: .LBB60_11: # %cond.store3
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v10, 2
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: andi a0, a1, 8
-; RV32-NEXT: beqz a0, .LBB60_4
-; RV32-NEXT: .LBB60_12: # %cond.store5
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 3
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v10, 3
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: andi a0, a1, 16
-; RV32-NEXT: beqz a0, .LBB60_5
-; RV32-NEXT: .LBB60_13: # %cond.store7
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 4
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v10, 4
-; RV32-NEXT: vmv.x.s a0, v12
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: andi a0, a1, 32
-; RV32-NEXT: beqz a0, .LBB60_6
-; RV32-NEXT: .LBB60_14: # %cond.store9
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 5
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v10, 5
-; RV32-NEXT: vmv.x.s a0, v12
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: andi a0, a1, 64
-; RV32-NEXT: beqz a0, .LBB60_7
-; RV32-NEXT: .LBB60_15: # %cond.store11
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 6
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v10, 6
-; RV32-NEXT: vmv.x.s a0, v12
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: andi a0, a1, -128
-; RV32-NEXT: beqz a0, .LBB60_8
-; RV32-NEXT: .LBB60_16: # %cond.store13
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 7
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v10, 7
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
; RV32-NEXT: ret
;
; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8bf16:
; RV64V: # %bb.0:
-; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-NEXT: vzext.vf8 v12, v9
-; RV64V-NEXT: vadd.vv v12, v12, v12
-; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64V-NEXT: vmv.x.s a1, v0
-; RV64V-NEXT: andi a2, a1, 1
-; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64V-NEXT: vadd.vx v12, v12, a0
-; RV64V-NEXT: bnez a2, .LBB60_11
-; RV64V-NEXT: # %bb.1: # %else
-; RV64V-NEXT: andi a0, a1, 2
-; RV64V-NEXT: bnez a0, .LBB60_12
-; RV64V-NEXT: .LBB60_2: # %else2
-; RV64V-NEXT: andi a0, a1, 4
-; RV64V-NEXT: bnez a0, .LBB60_13
-; RV64V-NEXT: .LBB60_3: # %else4
-; RV64V-NEXT: andi a0, a1, 8
-; RV64V-NEXT: beqz a0, .LBB60_5
-; RV64V-NEXT: .LBB60_4: # %cond.store5
-; RV64V-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v9, v8, 3
-; RV64V-NEXT: vmv.x.s a0, v9
-; RV64V-NEXT: fmv.h.x fa5, a0
-; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-NEXT: vslidedown.vi v10, v12, 3
-; RV64V-NEXT: vmv.x.s a0, v10
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: .LBB60_5: # %else6
-; RV64V-NEXT: addi sp, sp, -320
-; RV64V-NEXT: .cfi_def_cfa_offset 320
-; RV64V-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
-; RV64V-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
-; RV64V-NEXT: .cfi_offset ra, -8
-; RV64V-NEXT: .cfi_offset s0, -16
-; RV64V-NEXT: addi s0, sp, 320
-; RV64V-NEXT: .cfi_def_cfa s0, 0
-; RV64V-NEXT: andi sp, sp, -64
-; RV64V-NEXT: andi a0, a1, 16
-; RV64V-NEXT: bnez a0, .LBB60_14
-; RV64V-NEXT: # %bb.6: # %else8
-; RV64V-NEXT: andi a0, a1, 32
-; RV64V-NEXT: bnez a0, .LBB60_15
-; RV64V-NEXT: .LBB60_7: # %else10
-; RV64V-NEXT: andi a0, a1, 64
-; RV64V-NEXT: bnez a0, .LBB60_16
-; RV64V-NEXT: .LBB60_8: # %else12
-; RV64V-NEXT: andi a0, a1, -128
-; RV64V-NEXT: beqz a0, .LBB60_10
-; RV64V-NEXT: .LBB60_9: # %cond.store13
-; RV64V-NEXT: mv a0, sp
-; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 56(sp)
-; RV64V-NEXT: vslidedown.vi v8, v8, 7
-; RV64V-NEXT: vmv.x.s a1, v8
-; RV64V-NEXT: fmv.h.x fa5, a1
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: .LBB60_10: # %else14
-; RV64V-NEXT: addi sp, s0, -320
-; RV64V-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
-; RV64V-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
-; RV64V-NEXT: addi sp, sp, 320
-; RV64V-NEXT: ret
-; RV64V-NEXT: .LBB60_11: # %cond.store
-; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-NEXT: vmv.x.s a0, v8
-; RV64V-NEXT: fmv.h.x fa5, a0
-; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64V-NEXT: vmv.x.s a0, v12
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: andi a0, a1, 2
-; RV64V-NEXT: beqz a0, .LBB60_2
-; RV64V-NEXT: .LBB60_12: # %cond.store1
+; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64V-NEXT: vwaddu.vv v10, v9, v9
; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v9, v8, 1
-; RV64V-NEXT: vmv.x.s a0, v9
-; RV64V-NEXT: fmv.h.x fa5, a0
-; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v9, v12, 1
-; RV64V-NEXT: vmv.x.s a0, v9
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: andi a0, a1, 4
-; RV64V-NEXT: beqz a0, .LBB60_3
-; RV64V-NEXT: .LBB60_13: # %cond.store3
-; RV64V-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v9, v8, 2
-; RV64V-NEXT: vmv.x.s a0, v9
-; RV64V-NEXT: fmv.h.x fa5, a0
-; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-NEXT: vslidedown.vi v10, v12, 2
-; RV64V-NEXT: vmv.x.s a0, v10
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: andi a0, a1, 8
-; RV64V-NEXT: bnez a0, .LBB60_4
-; RV64V-NEXT: j .LBB60_5
-; RV64V-NEXT: .LBB60_14: # %cond.store7
-; RV64V-NEXT: addi a0, sp, 192
-; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 224(sp)
-; RV64V-NEXT: vslidedown.vi v9, v8, 4
-; RV64V-NEXT: vmv.x.s a2, v9
-; RV64V-NEXT: fmv.h.x fa5, a2
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: andi a0, a1, 32
-; RV64V-NEXT: beqz a0, .LBB60_7
-; RV64V-NEXT: .LBB60_15: # %cond.store9
-; RV64V-NEXT: addi a0, sp, 128
-; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 168(sp)
-; RV64V-NEXT: vslidedown.vi v9, v8, 5
-; RV64V-NEXT: vmv.x.s a2, v9
-; RV64V-NEXT: fmv.h.x fa5, a2
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: andi a0, a1, 64
-; RV64V-NEXT: beqz a0, .LBB60_8
-; RV64V-NEXT: .LBB60_16: # %cond.store11
-; RV64V-NEXT: addi a0, sp, 64
-; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 112(sp)
-; RV64V-NEXT: vslidedown.vi v9, v8, 6
-; RV64V-NEXT: vmv.x.s a2, v9
-; RV64V-NEXT: fmv.h.x fa5, a2
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: andi a0, a1, -128
-; RV64V-NEXT: bnez a0, .LBB60_9
-; RV64V-NEXT: j .LBB60_10
+; RV64V-NEXT: vsoxei16.v v8, (a0), v10, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8bf16:
; RV64ZVE32F: # %bb.0:
@@ -7864,119 +6666,7 @@ define void @mscatter_baseidx_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i16> %id
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vwadd.vv v10, v9, v9
-; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-NEXT: vmv.x.s a1, v0
-; RV32-NEXT: andi a2, a1, 1
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vadd.vx v10, v10, a0
-; RV32-NEXT: bnez a2, .LBB61_9
-; RV32-NEXT: # %bb.1: # %else
-; RV32-NEXT: andi a0, a1, 2
-; RV32-NEXT: bnez a0, .LBB61_10
-; RV32-NEXT: .LBB61_2: # %else2
-; RV32-NEXT: andi a0, a1, 4
-; RV32-NEXT: bnez a0, .LBB61_11
-; RV32-NEXT: .LBB61_3: # %else4
-; RV32-NEXT: andi a0, a1, 8
-; RV32-NEXT: bnez a0, .LBB61_12
-; RV32-NEXT: .LBB61_4: # %else6
-; RV32-NEXT: andi a0, a1, 16
-; RV32-NEXT: bnez a0, .LBB61_13
-; RV32-NEXT: .LBB61_5: # %else8
-; RV32-NEXT: andi a0, a1, 32
-; RV32-NEXT: bnez a0, .LBB61_14
-; RV32-NEXT: .LBB61_6: # %else10
-; RV32-NEXT: andi a0, a1, 64
-; RV32-NEXT: bnez a0, .LBB61_15
-; RV32-NEXT: .LBB61_7: # %else12
-; RV32-NEXT: andi a0, a1, -128
-; RV32-NEXT: bnez a0, .LBB61_16
-; RV32-NEXT: .LBB61_8: # %else14
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB61_9: # %cond.store
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vmv.x.s a0, v10
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: andi a0, a1, 2
-; RV32-NEXT: beqz a0, .LBB61_2
-; RV32-NEXT: .LBB61_10: # %cond.store1
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 1
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v10, 1
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: andi a0, a1, 4
-; RV32-NEXT: beqz a0, .LBB61_3
-; RV32-NEXT: .LBB61_11: # %cond.store3
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v10, 2
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: andi a0, a1, 8
-; RV32-NEXT: beqz a0, .LBB61_4
-; RV32-NEXT: .LBB61_12: # %cond.store5
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 3
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v10, 3
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: andi a0, a1, 16
-; RV32-NEXT: beqz a0, .LBB61_5
-; RV32-NEXT: .LBB61_13: # %cond.store7
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 4
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v10, 4
-; RV32-NEXT: vmv.x.s a0, v12
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: andi a0, a1, 32
-; RV32-NEXT: beqz a0, .LBB61_6
-; RV32-NEXT: .LBB61_14: # %cond.store9
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 5
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v10, 5
-; RV32-NEXT: vmv.x.s a0, v12
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: andi a0, a1, 64
-; RV32-NEXT: beqz a0, .LBB61_7
-; RV32-NEXT: .LBB61_15: # %cond.store11
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 6
-; RV32-NEXT: vmv.x.s a0, v9
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v12, v10, 6
-; RV32-NEXT: vmv.x.s a0, v12
-; RV32-NEXT: fsh fa5, 0(a0)
-; RV32-NEXT: andi a0, a1, -128
-; RV32-NEXT: beqz a0, .LBB61_8
-; RV32-NEXT: .LBB61_16: # %cond.store13
-; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 7
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: fmv.h.x fa5, a0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v10, 7
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: fsh fa5, 0(a0)
+; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
; RV32-NEXT: ret
;
; RV64V-LABEL: mscatter_baseidx_v8bf16:
@@ -7984,132 +6674,9 @@ define void @mscatter_baseidx_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i16> %id
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf4 v12, v9
; RV64V-NEXT: vadd.vv v12, v12, v12
-; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64V-NEXT: vmv.x.s a1, v0
-; RV64V-NEXT: andi a2, a1, 1
-; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64V-NEXT: vadd.vx v12, v12, a0
-; RV64V-NEXT: bnez a2, .LBB61_11
-; RV64V-NEXT: # %bb.1: # %else
-; RV64V-NEXT: andi a0, a1, 2
-; RV64V-NEXT: bnez a0, .LBB61_12
-; RV64V-NEXT: .LBB61_2: # %else2
-; RV64V-NEXT: andi a0, a1, 4
-; RV64V-NEXT: bnez a0, .LBB61_13
-; RV64V-NEXT: .LBB61_3: # %else4
-; RV64V-NEXT: andi a0, a1, 8
-; RV64V-NEXT: beqz a0, .LBB61_5
-; RV64V-NEXT: .LBB61_4: # %cond.store5
-; RV64V-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v9, v8, 3
-; RV64V-NEXT: vmv.x.s a0, v9
-; RV64V-NEXT: fmv.h.x fa5, a0
-; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-NEXT: vslidedown.vi v10, v12, 3
-; RV64V-NEXT: vmv.x.s a0, v10
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: .LBB61_5: # %else6
-; RV64V-NEXT: addi sp, sp, -320
-; RV64V-NEXT: .cfi_def_cfa_offset 320
-; RV64V-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
-; RV64V-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
-; RV64V-NEXT: .cfi_offset ra, -8
-; RV64V-NEXT: .cfi_offset s0, -16
-; RV64V-NEXT: addi s0, sp, 320
-; RV64V-NEXT: .cfi_def_cfa s0, 0
-; RV64V-NEXT: andi sp, sp, -64
-; RV64V-NEXT: andi a0, a1, 16
-; RV64V-NEXT: bnez a0, .LBB61_14
-; RV64V-NEXT: # %bb.6: # %else8
-; RV64V-NEXT: andi a0, a1, 32
-; RV64V-NEXT: bnez a0, .LBB61_15
-; RV64V-NEXT: .LBB61_7: # %else10
-; RV64V-NEXT: andi a0, a1, 64
-; RV64V-NEXT: bnez a0, .LBB61_16
-; RV64V-NEXT: .LBB61_8: # %else12
-; RV64V-NEXT: andi a0, a1, -128
-; RV64V-NEXT: beqz a0, .LBB61_10
-; RV64V-NEXT: .LBB61_9: # %cond.store13
-; RV64V-NEXT: mv a0, sp
-; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 56(sp)
-; RV64V-NEXT: vslidedown.vi v8, v8, 7
-; RV64V-NEXT: vmv.x.s a1, v8
-; RV64V-NEXT: fmv.h.x fa5, a1
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: .LBB61_10: # %else14
-; RV64V-NEXT: addi sp, s0, -320
-; RV64V-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
-; RV64V-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
-; RV64V-NEXT: addi sp, sp, 320
-; RV64V-NEXT: ret
-; RV64V-NEXT: .LBB61_11: # %cond.store
; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-NEXT: vmv.x.s a0, v8
-; RV64V-NEXT: fmv.h.x fa5, a0
-; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64V-NEXT: vmv.x.s a0, v12
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: andi a0, a1, 2
-; RV64V-NEXT: beqz a0, .LBB61_2
-; RV64V-NEXT: .LBB61_12: # %cond.store1
-; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v9, v8, 1
-; RV64V-NEXT: vmv.x.s a0, v9
-; RV64V-NEXT: fmv.h.x fa5, a0
-; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v9, v12, 1
-; RV64V-NEXT: vmv.x.s a0, v9
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: andi a0, a1, 4
-; RV64V-NEXT: beqz a0, .LBB61_3
-; RV64V-NEXT: .LBB61_13: # %cond.store3
-; RV64V-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64V-NEXT: vslidedown.vi v9, v8, 2
-; RV64V-NEXT: vmv.x.s a0, v9
-; RV64V-NEXT: fmv.h.x fa5, a0
-; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-NEXT: vslidedown.vi v10, v12, 2
-; RV64V-NEXT: vmv.x.s a0, v10
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: andi a0, a1, 8
-; RV64V-NEXT: bnez a0, .LBB61_4
-; RV64V-NEXT: j .LBB61_5
-; RV64V-NEXT: .LBB61_14: # %cond.store7
-; RV64V-NEXT: addi a0, sp, 192
-; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 224(sp)
-; RV64V-NEXT: vslidedown.vi v9, v8, 4
-; RV64V-NEXT: vmv.x.s a2, v9
-; RV64V-NEXT: fmv.h.x fa5, a2
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: andi a0, a1, 32
-; RV64V-NEXT: beqz a0, .LBB61_7
-; RV64V-NEXT: .LBB61_15: # %cond.store9
-; RV64V-NEXT: addi a0, sp, 128
-; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 168(sp)
-; RV64V-NEXT: vslidedown.vi v9, v8, 5
-; RV64V-NEXT: vmv.x.s a2, v9
-; RV64V-NEXT: fmv.h.x fa5, a2
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: andi a0, a1, 64
-; RV64V-NEXT: beqz a0, .LBB61_8
-; RV64V-NEXT: .LBB61_16: # %cond.store11
-; RV64V-NEXT: addi a0, sp, 64
-; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-NEXT: vse64.v v12, (a0)
-; RV64V-NEXT: ld a0, 112(sp)
-; RV64V-NEXT: vslidedown.vi v9, v8, 6
-; RV64V-NEXT: vmv.x.s a2, v9
-; RV64V-NEXT: fmv.h.x fa5, a2
-; RV64V-NEXT: fsh fa5, 0(a0)
-; RV64V-NEXT: andi a0, a1, -128
-; RV64V-NEXT: bnez a0, .LBB61_9
-; RV64V-NEXT: j .LBB61_10
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mscatter_baseidx_v8bf16:
; RV64ZVE32F: # %bb.0:
@@ -8243,53 +6810,23 @@ define void @mscatter_baseidx_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i16> %id
declare void @llvm.masked.scatter.v1f16.v1p0(<1 x half>, <1 x ptr>, i32, <1 x i1>)
define void @mscatter_v1f16(<1 x half> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
-; RV32V-ZVFH-LABEL: mscatter_v1f16:
-; RV32V-ZVFH: # %bb.0:
-; RV32V-ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV32V-ZVFH-NEXT: vsoxei32.v v8, (zero), v9, v0.t
-; RV32V-ZVFH-NEXT: ret
-;
-; RV64V-ZVFH-LABEL: mscatter_v1f16:
-; RV64V-ZVFH: # %bb.0:
-; RV64V-ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV64V-ZVFH-NEXT: vsoxei64.v v8, (zero), v9, v0.t
-; RV64V-ZVFH-NEXT: ret
-;
-; RV32V-ZVFHMIN-LABEL: mscatter_v1f16:
-; RV32V-ZVFHMIN: # %bb.0:
-; RV32V-ZVFHMIN-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; RV32V-ZVFHMIN-NEXT: vfirst.m a0, v0
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB62_2
-; RV32V-ZVFHMIN-NEXT: # %bb.1: # %cond.store
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: .LBB62_2: # %else
-; RV32V-ZVFHMIN-NEXT: ret
+; RV32V-LABEL: mscatter_v1f16:
+; RV32V: # %bb.0:
+; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
+; RV32V-NEXT: ret
;
-; RV64V-ZVFHMIN-LABEL: mscatter_v1f16:
-; RV64V-ZVFHMIN: # %bb.0:
-; RV64V-ZVFHMIN-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
-; RV64V-ZVFHMIN-NEXT: vfirst.m a0, v0
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB62_2
-; RV64V-ZVFHMIN-NEXT: # %bb.1: # %cond.store
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: .LBB62_2: # %else
-; RV64V-ZVFHMIN-NEXT: ret
+; RV64V-LABEL: mscatter_v1f16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
+; RV64V-NEXT: ret
;
-; RV32ZVE32F-ZVFH-LABEL: mscatter_v1f16:
-; RV32ZVE32F-ZVFH: # %bb.0:
-; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32ZVE32F-ZVFH-NEXT: vsoxei32.v v8, (zero), v9, v0.t
-; RV32ZVE32F-ZVFH-NEXT: ret
+; RV32ZVE32F-LABEL: mscatter_v1f16:
+; RV32ZVE32F: # %bb.0:
+; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
+; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-ZVFH-LABEL: mscatter_v1f16:
; RV64ZVE32F-ZVFH: # %bb.0:
@@ -8302,21 +6839,6 @@ define void @mscatter_v1f16(<1 x half> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
; RV64ZVE32F-ZVFH-NEXT: .LBB62_2: # %else
; RV64ZVE32F-ZVFH-NEXT: ret
;
-; RV32ZVE32F-ZVFHMIN-LABEL: mscatter_v1f16:
-; RV32ZVE32F-ZVFHMIN: # %bb.0:
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vfirst.m a0, v0
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB62_2
-; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.store
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB62_2: # %else
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-;
; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_v1f16:
; RV64ZVE32F-ZVFHMIN: # %bb.0:
; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
@@ -8336,85 +6858,23 @@ define void @mscatter_v1f16(<1 x half> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
declare void @llvm.masked.scatter.v2f16.v2p0(<2 x half>, <2 x ptr>, i32, <2 x i1>)
define void @mscatter_v2f16(<2 x half> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
-; RV32V-ZVFH-LABEL: mscatter_v2f16:
-; RV32V-ZVFH: # %bb.0:
-; RV32V-ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV32V-ZVFH-NEXT: vsoxei32.v v8, (zero), v9, v0.t
-; RV32V-ZVFH-NEXT: ret
-;
-; RV64V-ZVFH-LABEL: mscatter_v2f16:
-; RV64V-ZVFH: # %bb.0:
-; RV64V-ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV64V-ZVFH-NEXT: vsoxei64.v v8, (zero), v9, v0.t
-; RV64V-ZVFH-NEXT: ret
-;
-; RV32V-ZVFHMIN-LABEL: mscatter_v2f16:
-; RV32V-ZVFHMIN: # %bb.0:
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v0
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 1
-; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB63_3
-; RV32V-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV32V-ZVFHMIN-NEXT: andi a0, a0, 2
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB63_4
-; RV32V-ZVFHMIN-NEXT: .LBB63_2: # %else2
-; RV32V-ZVFHMIN-NEXT: ret
-; RV32V-ZVFHMIN-NEXT: .LBB63_3: # %cond.store
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v8
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV32V-ZVFHMIN-NEXT: andi a0, a0, 2
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB63_2
-; RV32V-ZVFHMIN-NEXT: .LBB63_4: # %cond.store1
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v9, 1
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: ret
+; RV32V-LABEL: mscatter_v2f16:
+; RV32V: # %bb.0:
+; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
+; RV32V-NEXT: ret
;
-; RV64V-ZVFHMIN-LABEL: mscatter_v2f16:
-; RV64V-ZVFHMIN: # %bb.0:
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v0
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 1
-; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB63_3
-; RV64V-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV64V-ZVFHMIN-NEXT: andi a0, a0, 2
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB63_4
-; RV64V-ZVFHMIN-NEXT: .LBB63_2: # %else2
-; RV64V-ZVFHMIN-NEXT: ret
-; RV64V-ZVFHMIN-NEXT: .LBB63_3: # %cond.store
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v8
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV64V-ZVFHMIN-NEXT: andi a0, a0, 2
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB63_2
-; RV64V-ZVFHMIN-NEXT: .LBB63_4: # %cond.store1
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v9, 1
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: ret
+; RV64V-LABEL: mscatter_v2f16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
+; RV64V-NEXT: ret
;
-; RV32ZVE32F-ZVFH-LABEL: mscatter_v2f16:
-; RV32ZVE32F-ZVFH: # %bb.0:
-; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV32ZVE32F-ZVFH-NEXT: vsoxei32.v v8, (zero), v9, v0.t
-; RV32ZVE32F-ZVFH-NEXT: ret
+; RV32ZVE32F-LABEL: mscatter_v2f16:
+; RV32ZVE32F: # %bb.0:
+; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
+; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-ZVFH-LABEL: mscatter_v2f16:
; RV64ZVE32F-ZVFH: # %bb.0:
@@ -8438,37 +6898,6 @@ define void @mscatter_v2f16(<2 x half> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a1)
; RV64ZVE32F-ZVFH-NEXT: ret
;
-; RV32ZVE32F-ZVFHMIN-LABEL: mscatter_v2f16:
-; RV32ZVE32F-ZVFHMIN: # %bb.0:
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v0
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB63_3
-; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a0, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB63_4
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB63_2: # %else2
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB63_3: # %cond.store
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a0, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB63_2
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB63_4: # %cond.store1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v9, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-;
; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_v2f16:
; RV64ZVE32F-ZVFHMIN: # %bb.0:
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -8501,141 +6930,17 @@ define void @mscatter_v2f16(<2 x half> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
declare void @llvm.masked.scatter.v4f16.v4p0(<4 x half>, <4 x ptr>, i32, <4 x i1>)
define void @mscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
-; RV32V-ZVFH-LABEL: mscatter_v4f16:
-; RV32V-ZVFH: # %bb.0:
-; RV32V-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32V-ZVFH-NEXT: vsoxei32.v v8, (zero), v9, v0.t
-; RV32V-ZVFH-NEXT: ret
-;
-; RV64V-ZVFH-LABEL: mscatter_v4f16:
-; RV64V-ZVFH: # %bb.0:
-; RV64V-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64V-ZVFH-NEXT: vsoxei64.v v8, (zero), v10, v0.t
-; RV64V-ZVFH-NEXT: ret
-;
-; RV32V-ZVFHMIN-LABEL: mscatter_v4f16:
-; RV32V-ZVFHMIN: # %bb.0:
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v0
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 1
-; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB64_5
-; RV32V-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 2
-; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB64_6
-; RV32V-ZVFHMIN-NEXT: .LBB64_2: # %else2
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 4
-; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB64_7
-; RV32V-ZVFHMIN-NEXT: .LBB64_3: # %else4
-; RV32V-ZVFHMIN-NEXT: andi a0, a0, 8
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB64_8
-; RV32V-ZVFHMIN-NEXT: .LBB64_4: # %else6
-; RV32V-ZVFHMIN-NEXT: ret
-; RV32V-ZVFHMIN-NEXT: .LBB64_5: # %cond.store
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v8
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 2
-; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB64_2
-; RV32V-ZVFHMIN-NEXT: .LBB64_6: # %cond.store1
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v10
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 1
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v10
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 4
-; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB64_3
-; RV32V-ZVFHMIN-NEXT: .LBB64_7: # %cond.store3
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 2
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v10
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 2
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v10
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV32V-ZVFHMIN-NEXT: andi a0, a0, 8
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB64_4
-; RV32V-ZVFHMIN-NEXT: .LBB64_8: # %cond.store5
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v9, 3
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: ret
-;
-; RV64V-ZVFHMIN-LABEL: mscatter_v4f16:
-; RV64V-ZVFHMIN: # %bb.0:
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v0
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 1
-; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB64_5
-; RV64V-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 2
-; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB64_6
-; RV64V-ZVFHMIN-NEXT: .LBB64_2: # %else2
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 4
-; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB64_7
-; RV64V-ZVFHMIN-NEXT: .LBB64_3: # %else4
-; RV64V-ZVFHMIN-NEXT: andi a0, a0, 8
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB64_8
-; RV64V-ZVFHMIN-NEXT: .LBB64_4: # %else6
-; RV64V-ZVFHMIN-NEXT: ret
-; RV64V-ZVFHMIN-NEXT: .LBB64_5: # %cond.store
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v8
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v10
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 2
-; RV64V-ZVFHMIN-NEXT: beqz a1, .LBB64_2
-; RV64V-ZVFHMIN-NEXT: .LBB64_6: # %cond.store1
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 4
-; RV64V-ZVFHMIN-NEXT: beqz a1, .LBB64_3
-; RV64V-ZVFHMIN-NEXT: .LBB64_7: # %cond.store3
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 2
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v12
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV64V-ZVFHMIN-NEXT: andi a0, a0, 8
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB64_4
-; RV64V-ZVFHMIN-NEXT: .LBB64_8: # %cond.store5
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 3
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: ret
+; RV32-LABEL: mscatter_v4f16:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
+; RV32-NEXT: ret
;
-; RV32ZVE32F-ZVFH-LABEL: mscatter_v4f16:
-; RV32ZVE32F-ZVFH: # %bb.0:
-; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32ZVE32F-ZVFH-NEXT: vsoxei32.v v8, (zero), v9, v0.t
-; RV32ZVE32F-ZVFH-NEXT: ret
+; RV64V-LABEL: mscatter_v4f16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-ZVFH-LABEL: mscatter_v4f16:
; RV64ZVE32F-ZVFH: # %bb.0:
@@ -8681,65 +6986,6 @@ define void @mscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a1)
; RV64ZVE32F-ZVFH-NEXT: ret
;
-; RV32ZVE32F-ZVFHMIN-LABEL: mscatter_v4f16:
-; RV32ZVE32F-ZVFHMIN: # %bb.0:
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v0
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB64_5
-; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB64_6
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB64_2: # %else2
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB64_7
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB64_3: # %else4
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a0, 8
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB64_8
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB64_4: # %else6
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB64_5: # %cond.store
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB64_2
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB64_6: # %cond.store1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB64_3
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB64_7: # %cond.store3
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a0, 8
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB64_4
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB64_8: # %cond.store5
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v9, 3
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-;
; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_v4f16:
; RV64ZVE32F-ZVFHMIN: # %bb.0:
; RV64ZVE32F-ZVFHMIN-NEXT: ld a4, 8(a0)
@@ -8796,91 +7042,17 @@ define void @mscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
}
define void @mscatter_truemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs) {
-; RV32V-ZVFH-LABEL: mscatter_truemask_v4f16:
-; RV32V-ZVFH: # %bb.0:
-; RV32V-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32V-ZVFH-NEXT: vsoxei32.v v8, (zero), v9
-; RV32V-ZVFH-NEXT: ret
-;
-; RV64V-ZVFH-LABEL: mscatter_truemask_v4f16:
-; RV64V-ZVFH: # %bb.0:
-; RV64V-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64V-ZVFH-NEXT: vsoxei64.v v8, (zero), v10
-; RV64V-ZVFH-NEXT: ret
-;
-; RV32V-ZVFHMIN-LABEL: mscatter_truemask_v4f16:
-; RV32V-ZVFHMIN: # %bb.0:
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 1
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 2
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 2
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v9, 3
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: ret
-;
-; RV64V-ZVFHMIN-LABEL: mscatter_truemask_v4f16:
-; RV64V-ZVFHMIN: # %bb.0:
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 2
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 3
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: ret
+; RV32-LABEL: mscatter_truemask_v4f16:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT: vsoxei32.v v8, (zero), v9
+; RV32-NEXT: ret
;
-; RV32ZVE32F-ZVFH-LABEL: mscatter_truemask_v4f16:
-; RV32ZVE32F-ZVFH: # %bb.0:
-; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32ZVE32F-ZVFH-NEXT: vsoxei32.v v8, (zero), v9
-; RV32ZVE32F-ZVFH-NEXT: ret
+; RV64V-LABEL: mscatter_truemask_v4f16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v10
+; RV64V-NEXT: ret
;
; RV64ZVE32F-ZVFH-LABEL: mscatter_truemask_v4f16:
; RV64ZVE32F-ZVFH: # %bb.0:
@@ -8898,40 +7070,6 @@ define void @mscatter_truemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs) {
; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
; RV64ZVE32F-ZVFH-NEXT: ret
;
-; RV32ZVE32F-ZVFHMIN-LABEL: mscatter_truemask_v4f16:
-; RV32ZVE32F-ZVFHMIN: # %bb.0:
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v9, 3
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-;
; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_truemask_v4f16:
; RV64ZVE32F-ZVFHMIN: # %bb.0:
; RV64ZVE32F-ZVFHMIN-NEXT: ld a1, 0(a0)
@@ -8970,265 +7108,17 @@ define void @mscatter_falsemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs) {
declare void @llvm.masked.scatter.v8f16.v8p0(<8 x half>, <8 x ptr>, i32, <8 x i1>)
define void @mscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
-; RV32V-ZVFH-LABEL: mscatter_v8f16:
-; RV32V-ZVFH: # %bb.0:
-; RV32V-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32V-ZVFH-NEXT: vsoxei32.v v8, (zero), v10, v0.t
-; RV32V-ZVFH-NEXT: ret
-;
-; RV64V-ZVFH-LABEL: mscatter_v8f16:
-; RV64V-ZVFH: # %bb.0:
-; RV64V-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-ZVFH-NEXT: vsoxei64.v v8, (zero), v12, v0.t
-; RV64V-ZVFH-NEXT: ret
-;
-; RV32V-ZVFHMIN-LABEL: mscatter_v8f16:
-; RV32V-ZVFHMIN: # %bb.0:
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v0
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 1
-; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB67_9
-; RV32V-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 2
-; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB67_10
-; RV32V-ZVFHMIN-NEXT: .LBB67_2: # %else2
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 4
-; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB67_11
-; RV32V-ZVFHMIN-NEXT: .LBB67_3: # %else4
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 8
-; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB67_12
-; RV32V-ZVFHMIN-NEXT: .LBB67_4: # %else6
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 16
-; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB67_13
-; RV32V-ZVFHMIN-NEXT: .LBB67_5: # %else8
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 32
-; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB67_14
-; RV32V-ZVFHMIN-NEXT: .LBB67_6: # %else10
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 64
-; RV32V-ZVFHMIN-NEXT: bnez a1, .LBB67_15
-; RV32V-ZVFHMIN-NEXT: .LBB67_7: # %else12
-; RV32V-ZVFHMIN-NEXT: andi a0, a0, -128
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB67_16
-; RV32V-ZVFHMIN-NEXT: .LBB67_8: # %else14
-; RV32V-ZVFHMIN-NEXT: ret
-; RV32V-ZVFHMIN-NEXT: .LBB67_9: # %cond.store
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v8
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v10
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 2
-; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB67_2
-; RV32V-ZVFHMIN-NEXT: .LBB67_10: # %cond.store1
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 4
-; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB67_3
-; RV32V-ZVFHMIN-NEXT: .LBB67_11: # %cond.store3
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 8
-; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB67_4
-; RV32V-ZVFHMIN-NEXT: .LBB67_12: # %cond.store5
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 3
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 16
-; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB67_5
-; RV32V-ZVFHMIN-NEXT: .LBB67_13: # %cond.store7
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v12
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 32
-; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB67_6
-; RV32V-ZVFHMIN-NEXT: .LBB67_14: # %cond.store9
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v12
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV32V-ZVFHMIN-NEXT: andi a1, a0, 64
-; RV32V-ZVFHMIN-NEXT: beqz a1, .LBB67_7
-; RV32V-ZVFHMIN-NEXT: .LBB67_15: # %cond.store11
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v12
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV32V-ZVFHMIN-NEXT: andi a0, a0, -128
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB67_8
-; RV32V-ZVFHMIN-NEXT: .LBB67_16: # %cond.store13
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 7
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: ret
-;
-; RV64V-ZVFHMIN-LABEL: mscatter_v8f16:
-; RV64V-ZVFHMIN: # %bb.0:
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v0
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 1
-; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB67_11
-; RV64V-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 2
-; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB67_12
-; RV64V-ZVFHMIN-NEXT: .LBB67_2: # %else2
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 4
-; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB67_13
-; RV64V-ZVFHMIN-NEXT: .LBB67_3: # %else4
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 8
-; RV64V-ZVFHMIN-NEXT: beqz a1, .LBB67_5
-; RV64V-ZVFHMIN-NEXT: .LBB67_4: # %cond.store5
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 3
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v10
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV64V-ZVFHMIN-NEXT: .LBB67_5: # %else6
-; RV64V-ZVFHMIN-NEXT: addi sp, sp, -320
-; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa_offset 320
-; RV64V-ZVFHMIN-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
-; RV64V-ZVFHMIN-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
-; RV64V-ZVFHMIN-NEXT: .cfi_offset ra, -8
-; RV64V-ZVFHMIN-NEXT: .cfi_offset s0, -16
-; RV64V-ZVFHMIN-NEXT: addi s0, sp, 320
-; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa s0, 0
-; RV64V-ZVFHMIN-NEXT: andi sp, sp, -64
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 16
-; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB67_14
-; RV64V-ZVFHMIN-NEXT: # %bb.6: # %else8
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 32
-; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB67_15
-; RV64V-ZVFHMIN-NEXT: .LBB67_7: # %else10
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 64
-; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB67_16
-; RV64V-ZVFHMIN-NEXT: .LBB67_8: # %else12
-; RV64V-ZVFHMIN-NEXT: andi a0, a0, -128
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB67_10
-; RV64V-ZVFHMIN-NEXT: .LBB67_9: # %cond.store13
-; RV64V-ZVFHMIN-NEXT: mv a0, sp
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 56(sp)
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v8
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: .LBB67_10: # %else14
-; RV64V-ZVFHMIN-NEXT: addi sp, s0, -320
-; RV64V-ZVFHMIN-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
-; RV64V-ZVFHMIN-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
-; RV64V-ZVFHMIN-NEXT: addi sp, sp, 320
-; RV64V-ZVFHMIN-NEXT: ret
-; RV64V-ZVFHMIN-NEXT: .LBB67_11: # %cond.store
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v8
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v12
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 2
-; RV64V-ZVFHMIN-NEXT: beqz a1, .LBB67_2
-; RV64V-ZVFHMIN-NEXT: .LBB67_12: # %cond.store1
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v12, 1
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 4
-; RV64V-ZVFHMIN-NEXT: beqz a1, .LBB67_3
-; RV64V-ZVFHMIN-NEXT: .LBB67_13: # %cond.store3
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 2
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v10
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 8
-; RV64V-ZVFHMIN-NEXT: bnez a1, .LBB67_4
-; RV64V-ZVFHMIN-NEXT: j .LBB67_5
-; RV64V-ZVFHMIN-NEXT: .LBB67_14: # %cond.store7
-; RV64V-ZVFHMIN-NEXT: addi a1, sp, 192
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a1)
-; RV64V-ZVFHMIN-NEXT: ld a1, 224(sp)
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 32
-; RV64V-ZVFHMIN-NEXT: beqz a1, .LBB67_7
-; RV64V-ZVFHMIN-NEXT: .LBB67_15: # %cond.store9
-; RV64V-ZVFHMIN-NEXT: addi a1, sp, 128
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a1)
-; RV64V-ZVFHMIN-NEXT: ld a1, 168(sp)
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV64V-ZVFHMIN-NEXT: andi a1, a0, 64
-; RV64V-ZVFHMIN-NEXT: beqz a1, .LBB67_8
-; RV64V-ZVFHMIN-NEXT: .LBB67_16: # %cond.store11
-; RV64V-ZVFHMIN-NEXT: addi a1, sp, 64
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a1)
-; RV64V-ZVFHMIN-NEXT: ld a1, 112(sp)
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV64V-ZVFHMIN-NEXT: andi a0, a0, -128
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB67_9
-; RV64V-ZVFHMIN-NEXT: j .LBB67_10
+; RV32-LABEL: mscatter_v8f16:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
+; RV32-NEXT: ret
;
-; RV32ZVE32F-ZVFH-LABEL: mscatter_v8f16:
-; RV32ZVE32F-ZVFH: # %bb.0:
-; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32ZVE32F-ZVFH-NEXT: vsoxei32.v v8, (zero), v10, v0.t
-; RV32ZVE32F-ZVFH-NEXT: ret
+; RV64V-LABEL: mscatter_v8f16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-ZVFH-LABEL: mscatter_v8f16:
; RV64ZVE32F-ZVFH: # %bb.0:
@@ -9314,121 +7204,6 @@ define void @mscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a1)
; RV64ZVE32F-ZVFH-NEXT: ret
;
-; RV32ZVE32F-ZVFHMIN-LABEL: mscatter_v8f16:
-; RV32ZVE32F-ZVFHMIN: # %bb.0:
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v0
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB67_9
-; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB67_10
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_2: # %else2
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB67_11
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_3: # %else4
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 8
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB67_12
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_4: # %else6
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 16
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB67_13
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_5: # %else8
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 32
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB67_14
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_6: # %else10
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 64
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB67_15
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_7: # %else12
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a0, -128
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_16
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_8: # %else14
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_9: # %cond.store
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB67_2
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_10: # %cond.store1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB67_3
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_11: # %cond.store3
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 8
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB67_4
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_12: # %cond.store5
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 3
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 16
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB67_5
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_13: # %cond.store7
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 32
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB67_6
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_14: # %cond.store9
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a1, a0, 64
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB67_7
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_15: # %cond.store11
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a0, -128
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_8
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB67_16: # %cond.store13
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 7
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-;
; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_v8f16:
; RV64ZVE32F-ZVFHMIN: # %bb.0:
; RV64ZVE32F-ZVFHMIN-NEXT: ld a3, 40(a0)
@@ -9533,284 +7308,23 @@ define void @mscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
}
define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
-; RV32V-ZVFH-LABEL: mscatter_baseidx_v8i8_v8f16:
-; RV32V-ZVFH: # %bb.0:
-; RV32V-ZVFH-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32V-ZVFH-NEXT: vsext.vf4 v10, v9
-; RV32V-ZVFH-NEXT: vadd.vv v10, v10, v10
-; RV32V-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32V-ZVFH-NEXT: vsoxei32.v v8, (a0), v10, v0.t
-; RV32V-ZVFH-NEXT: ret
-;
-; RV64V-ZVFH-LABEL: mscatter_baseidx_v8i8_v8f16:
-; RV64V-ZVFH: # %bb.0:
-; RV64V-ZVFH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFH-NEXT: vsext.vf8 v12, v9
-; RV64V-ZVFH-NEXT: vadd.vv v12, v12, v12
-; RV64V-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-ZVFH-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64V-ZVFH-NEXT: ret
-;
-; RV32V-ZVFHMIN-LABEL: mscatter_baseidx_v8i8_v8f16:
-; RV32V-ZVFHMIN: # %bb.0:
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vsext.vf4 v10, v9
-; RV32V-ZVFHMIN-NEXT: vadd.vv v10, v10, v10
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v0
-; RV32V-ZVFHMIN-NEXT: andi a2, a1, 1
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
-; RV32V-ZVFHMIN-NEXT: bnez a2, .LBB68_9
-; RV32V-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB68_10
-; RV32V-ZVFHMIN-NEXT: .LBB68_2: # %else2
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB68_11
-; RV32V-ZVFHMIN-NEXT: .LBB68_3: # %else4
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB68_12
-; RV32V-ZVFHMIN-NEXT: .LBB68_4: # %else6
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB68_13
-; RV32V-ZVFHMIN-NEXT: .LBB68_5: # %else8
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB68_14
-; RV32V-ZVFHMIN-NEXT: .LBB68_6: # %else10
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB68_15
-; RV32V-ZVFHMIN-NEXT: .LBB68_7: # %else12
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB68_16
-; RV32V-ZVFHMIN-NEXT: .LBB68_8: # %else14
-; RV32V-ZVFHMIN-NEXT: ret
-; RV32V-ZVFHMIN-NEXT: .LBB68_9: # %cond.store
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB68_2
-; RV32V-ZVFHMIN-NEXT: .LBB68_10: # %cond.store1
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB68_3
-; RV32V-ZVFHMIN-NEXT: .LBB68_11: # %cond.store3
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB68_4
-; RV32V-ZVFHMIN-NEXT: .LBB68_12: # %cond.store5
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 3
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB68_5
-; RV32V-ZVFHMIN-NEXT: .LBB68_13: # %cond.store7
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB68_6
-; RV32V-ZVFHMIN-NEXT: .LBB68_14: # %cond.store9
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB68_7
-; RV32V-ZVFHMIN-NEXT: .LBB68_15: # %cond.store11
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB68_8
-; RV32V-ZVFHMIN-NEXT: .LBB68_16: # %cond.store13
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 7
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: ret
-;
-; RV64V-ZVFHMIN-LABEL: mscatter_baseidx_v8i8_v8f16:
-; RV64V-ZVFHMIN: # %bb.0:
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vsext.vf8 v12, v9
-; RV64V-ZVFHMIN-NEXT: vadd.vv v12, v12, v12
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v0
-; RV64V-ZVFHMIN-NEXT: andi a2, a1, 1
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vadd.vx v12, v12, a0
-; RV64V-ZVFHMIN-NEXT: bnez a2, .LBB68_11
-; RV64V-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB68_12
-; RV64V-ZVFHMIN-NEXT: .LBB68_2: # %else2
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB68_13
-; RV64V-ZVFHMIN-NEXT: .LBB68_3: # %else4
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB68_5
-; RV64V-ZVFHMIN-NEXT: .LBB68_4: # %cond.store5
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 3
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: .LBB68_5: # %else6
-; RV64V-ZVFHMIN-NEXT: addi sp, sp, -320
-; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa_offset 320
-; RV64V-ZVFHMIN-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
-; RV64V-ZVFHMIN-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
-; RV64V-ZVFHMIN-NEXT: .cfi_offset ra, -8
-; RV64V-ZVFHMIN-NEXT: .cfi_offset s0, -16
-; RV64V-ZVFHMIN-NEXT: addi s0, sp, 320
-; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa s0, 0
-; RV64V-ZVFHMIN-NEXT: andi sp, sp, -64
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB68_14
-; RV64V-ZVFHMIN-NEXT: # %bb.6: # %else8
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB68_15
-; RV64V-ZVFHMIN-NEXT: .LBB68_7: # %else10
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB68_16
-; RV64V-ZVFHMIN-NEXT: .LBB68_8: # %else12
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB68_10
-; RV64V-ZVFHMIN-NEXT: .LBB68_9: # %cond.store13
-; RV64V-ZVFHMIN-NEXT: mv a0, sp
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 56(sp)
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v8
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: .LBB68_10: # %else14
-; RV64V-ZVFHMIN-NEXT: addi sp, s0, -320
-; RV64V-ZVFHMIN-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
-; RV64V-ZVFHMIN-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
-; RV64V-ZVFHMIN-NEXT: addi sp, sp, 320
-; RV64V-ZVFHMIN-NEXT: ret
-; RV64V-ZVFHMIN-NEXT: .LBB68_11: # %cond.store
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB68_2
-; RV64V-ZVFHMIN-NEXT: .LBB68_12: # %cond.store1
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v12, 1
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB68_3
-; RV64V-ZVFHMIN-NEXT: .LBB68_13: # %cond.store3
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 2
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB68_4
-; RV64V-ZVFHMIN-NEXT: j .LBB68_5
-; RV64V-ZVFHMIN-NEXT: .LBB68_14: # %cond.store7
-; RV64V-ZVFHMIN-NEXT: addi a0, sp, 192
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 224(sp)
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB68_7
-; RV64V-ZVFHMIN-NEXT: .LBB68_15: # %cond.store9
-; RV64V-ZVFHMIN-NEXT: addi a0, sp, 128
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 168(sp)
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB68_8
-; RV64V-ZVFHMIN-NEXT: .LBB68_16: # %cond.store11
-; RV64V-ZVFHMIN-NEXT: addi a0, sp, 64
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 112(sp)
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB68_9
-; RV64V-ZVFHMIN-NEXT: j .LBB68_10
+; RV32-LABEL: mscatter_baseidx_v8i8_v8f16:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vsext.vf4 v10, v9
+; RV32-NEXT: vadd.vv v10, v10, v10
+; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
+; RV32-NEXT: ret
;
-; RV32ZVE32F-ZVFH-LABEL: mscatter_baseidx_v8i8_v8f16:
-; RV32ZVE32F-ZVFH: # %bb.0:
-; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32ZVE32F-ZVFH-NEXT: vsext.vf4 v10, v9
-; RV32ZVE32F-ZVFH-NEXT: vadd.vv v10, v10, v10
-; RV32ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32ZVE32F-ZVFH-NEXT: vsoxei32.v v8, (a0), v10, v0.t
-; RV32ZVE32F-ZVFH-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_v8i8_v8f16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf8 v12, v9
+; RV64V-NEXT: vadd.vv v12, v12, v12
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-ZVFH-LABEL: mscatter_baseidx_v8i8_v8f16:
; RV64ZVE32F-ZVFH: # %bb.0:
@@ -9922,126 +7436,6 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
; RV64ZVE32F-ZVFH-NEXT: ret
;
-; RV32ZVE32F-ZVFHMIN-LABEL: mscatter_baseidx_v8i8_v8f16:
-; RV32ZVE32F-ZVFHMIN: # %bb.0:
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vsext.vf4 v10, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: vadd.vv v10, v10, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_9
-; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB68_10
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_2: # %else2
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB68_11
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_3: # %else4
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB68_12
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_4: # %else6
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB68_13
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_5: # %else8
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB68_14
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_6: # %else10
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB68_15
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_7: # %else12
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB68_16
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_8: # %else14
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_9: # %cond.store
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB68_2
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_10: # %cond.store1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB68_3
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_11: # %cond.store3
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB68_4
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_12: # %cond.store5
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 3
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB68_5
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_13: # %cond.store7
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB68_6
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_14: # %cond.store9
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB68_7
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_15: # %cond.store11
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB68_8
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB68_16: # %cond.store13
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 7
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-;
; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_baseidx_v8i8_v8f16:
; RV64ZVE32F-ZVFHMIN: # %bb.0:
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -10173,284 +7567,23 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i
}
define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
-; RV32V-ZVFH-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
-; RV32V-ZVFH: # %bb.0:
-; RV32V-ZVFH-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32V-ZVFH-NEXT: vsext.vf4 v10, v9
-; RV32V-ZVFH-NEXT: vadd.vv v10, v10, v10
-; RV32V-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32V-ZVFH-NEXT: vsoxei32.v v8, (a0), v10, v0.t
-; RV32V-ZVFH-NEXT: ret
-;
-; RV64V-ZVFH-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
-; RV64V-ZVFH: # %bb.0:
-; RV64V-ZVFH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFH-NEXT: vsext.vf8 v12, v9
-; RV64V-ZVFH-NEXT: vadd.vv v12, v12, v12
-; RV64V-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-ZVFH-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64V-ZVFH-NEXT: ret
-;
-; RV32V-ZVFHMIN-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
-; RV32V-ZVFHMIN: # %bb.0:
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vsext.vf4 v10, v9
-; RV32V-ZVFHMIN-NEXT: vadd.vv v10, v10, v10
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v0
-; RV32V-ZVFHMIN-NEXT: andi a2, a1, 1
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
-; RV32V-ZVFHMIN-NEXT: bnez a2, .LBB69_9
-; RV32V-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB69_10
-; RV32V-ZVFHMIN-NEXT: .LBB69_2: # %else2
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB69_11
-; RV32V-ZVFHMIN-NEXT: .LBB69_3: # %else4
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB69_12
-; RV32V-ZVFHMIN-NEXT: .LBB69_4: # %else6
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB69_13
-; RV32V-ZVFHMIN-NEXT: .LBB69_5: # %else8
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB69_14
-; RV32V-ZVFHMIN-NEXT: .LBB69_6: # %else10
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB69_15
-; RV32V-ZVFHMIN-NEXT: .LBB69_7: # %else12
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB69_16
-; RV32V-ZVFHMIN-NEXT: .LBB69_8: # %else14
-; RV32V-ZVFHMIN-NEXT: ret
-; RV32V-ZVFHMIN-NEXT: .LBB69_9: # %cond.store
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB69_2
-; RV32V-ZVFHMIN-NEXT: .LBB69_10: # %cond.store1
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB69_3
-; RV32V-ZVFHMIN-NEXT: .LBB69_11: # %cond.store3
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB69_4
-; RV32V-ZVFHMIN-NEXT: .LBB69_12: # %cond.store5
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 3
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB69_5
-; RV32V-ZVFHMIN-NEXT: .LBB69_13: # %cond.store7
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB69_6
-; RV32V-ZVFHMIN-NEXT: .LBB69_14: # %cond.store9
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB69_7
-; RV32V-ZVFHMIN-NEXT: .LBB69_15: # %cond.store11
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB69_8
-; RV32V-ZVFHMIN-NEXT: .LBB69_16: # %cond.store13
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 7
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: ret
-;
-; RV64V-ZVFHMIN-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
-; RV64V-ZVFHMIN: # %bb.0:
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vsext.vf8 v12, v9
-; RV64V-ZVFHMIN-NEXT: vadd.vv v12, v12, v12
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v0
-; RV64V-ZVFHMIN-NEXT: andi a2, a1, 1
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vadd.vx v12, v12, a0
-; RV64V-ZVFHMIN-NEXT: bnez a2, .LBB69_11
-; RV64V-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB69_12
-; RV64V-ZVFHMIN-NEXT: .LBB69_2: # %else2
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB69_13
-; RV64V-ZVFHMIN-NEXT: .LBB69_3: # %else4
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB69_5
-; RV64V-ZVFHMIN-NEXT: .LBB69_4: # %cond.store5
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 3
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: .LBB69_5: # %else6
-; RV64V-ZVFHMIN-NEXT: addi sp, sp, -320
-; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa_offset 320
-; RV64V-ZVFHMIN-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
-; RV64V-ZVFHMIN-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
-; RV64V-ZVFHMIN-NEXT: .cfi_offset ra, -8
-; RV64V-ZVFHMIN-NEXT: .cfi_offset s0, -16
-; RV64V-ZVFHMIN-NEXT: addi s0, sp, 320
-; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa s0, 0
-; RV64V-ZVFHMIN-NEXT: andi sp, sp, -64
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB69_14
-; RV64V-ZVFHMIN-NEXT: # %bb.6: # %else8
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB69_15
-; RV64V-ZVFHMIN-NEXT: .LBB69_7: # %else10
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB69_16
-; RV64V-ZVFHMIN-NEXT: .LBB69_8: # %else12
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB69_10
-; RV64V-ZVFHMIN-NEXT: .LBB69_9: # %cond.store13
-; RV64V-ZVFHMIN-NEXT: mv a0, sp
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 56(sp)
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v8
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: .LBB69_10: # %else14
-; RV64V-ZVFHMIN-NEXT: addi sp, s0, -320
-; RV64V-ZVFHMIN-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
-; RV64V-ZVFHMIN-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
-; RV64V-ZVFHMIN-NEXT: addi sp, sp, 320
-; RV64V-ZVFHMIN-NEXT: ret
-; RV64V-ZVFHMIN-NEXT: .LBB69_11: # %cond.store
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB69_2
-; RV64V-ZVFHMIN-NEXT: .LBB69_12: # %cond.store1
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v12, 1
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB69_3
-; RV64V-ZVFHMIN-NEXT: .LBB69_13: # %cond.store3
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 2
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB69_4
-; RV64V-ZVFHMIN-NEXT: j .LBB69_5
-; RV64V-ZVFHMIN-NEXT: .LBB69_14: # %cond.store7
-; RV64V-ZVFHMIN-NEXT: addi a0, sp, 192
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 224(sp)
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB69_7
-; RV64V-ZVFHMIN-NEXT: .LBB69_15: # %cond.store9
-; RV64V-ZVFHMIN-NEXT: addi a0, sp, 128
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 168(sp)
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB69_8
-; RV64V-ZVFHMIN-NEXT: .LBB69_16: # %cond.store11
-; RV64V-ZVFHMIN-NEXT: addi a0, sp, 64
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 112(sp)
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB69_9
-; RV64V-ZVFHMIN-NEXT: j .LBB69_10
+; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV32-NEXT: vsext.vf4 v10, v9
+; RV32-NEXT: vadd.vv v10, v10, v10
+; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
+; RV32-NEXT: ret
;
-; RV32ZVE32F-ZVFH-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
-; RV32ZVE32F-ZVFH: # %bb.0:
-; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32ZVE32F-ZVFH-NEXT: vsext.vf4 v10, v9
-; RV32ZVE32F-ZVFH-NEXT: vadd.vv v10, v10, v10
-; RV32ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32ZVE32F-ZVFH-NEXT: vsoxei32.v v8, (a0), v10, v0.t
-; RV32ZVE32F-ZVFH-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf8 v12, v9
+; RV64V-NEXT: vadd.vv v12, v12, v12
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-ZVFH-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
; RV64ZVE32F-ZVFH: # %bb.0:
@@ -10562,126 +7695,6 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
; RV64ZVE32F-ZVFH-NEXT: ret
;
-; RV32ZVE32F-ZVFHMIN-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
-; RV32ZVE32F-ZVFHMIN: # %bb.0:
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vsext.vf4 v10, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: vadd.vv v10, v10, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_9
-; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB69_10
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_2: # %else2
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB69_11
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_3: # %else4
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB69_12
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_4: # %else6
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB69_13
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_5: # %else8
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB69_14
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_6: # %else10
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB69_15
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_7: # %else12
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB69_16
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_8: # %else14
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_9: # %cond.store
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB69_2
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_10: # %cond.store1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB69_3
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_11: # %cond.store3
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB69_4
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_12: # %cond.store5
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 3
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB69_5
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_13: # %cond.store7
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB69_6
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_14: # %cond.store9
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB69_7
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_15: # %cond.store11
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB69_8
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB69_16: # %cond.store13
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 7
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-;
; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
; RV64ZVE32F-ZVFHMIN: # %bb.0:
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -10814,281 +7827,21 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
}
define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
-; RV32V-ZVFH-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
-; RV32V-ZVFH: # %bb.0:
-; RV32V-ZVFH-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32V-ZVFH-NEXT: vwaddu.vv v10, v9, v9
-; RV32V-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32V-ZVFH-NEXT: vsoxei16.v v8, (a0), v10, v0.t
-; RV32V-ZVFH-NEXT: ret
-;
-; RV64V-ZVFH-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
-; RV64V-ZVFH: # %bb.0:
-; RV64V-ZVFH-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64V-ZVFH-NEXT: vwaddu.vv v10, v9, v9
-; RV64V-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-ZVFH-NEXT: vsoxei16.v v8, (a0), v10, v0.t
-; RV64V-ZVFH-NEXT: ret
-;
-; RV32V-ZVFHMIN-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
-; RV32V-ZVFHMIN: # %bb.0:
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vzext.vf4 v10, v9
-; RV32V-ZVFHMIN-NEXT: vadd.vv v10, v10, v10
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v0
-; RV32V-ZVFHMIN-NEXT: andi a2, a1, 1
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
-; RV32V-ZVFHMIN-NEXT: bnez a2, .LBB70_9
-; RV32V-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB70_10
-; RV32V-ZVFHMIN-NEXT: .LBB70_2: # %else2
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB70_11
-; RV32V-ZVFHMIN-NEXT: .LBB70_3: # %else4
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB70_12
-; RV32V-ZVFHMIN-NEXT: .LBB70_4: # %else6
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB70_13
-; RV32V-ZVFHMIN-NEXT: .LBB70_5: # %else8
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB70_14
-; RV32V-ZVFHMIN-NEXT: .LBB70_6: # %else10
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB70_15
-; RV32V-ZVFHMIN-NEXT: .LBB70_7: # %else12
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB70_16
-; RV32V-ZVFHMIN-NEXT: .LBB70_8: # %else14
-; RV32V-ZVFHMIN-NEXT: ret
-; RV32V-ZVFHMIN-NEXT: .LBB70_9: # %cond.store
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB70_2
-; RV32V-ZVFHMIN-NEXT: .LBB70_10: # %cond.store1
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB70_3
-; RV32V-ZVFHMIN-NEXT: .LBB70_11: # %cond.store3
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB70_4
-; RV32V-ZVFHMIN-NEXT: .LBB70_12: # %cond.store5
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 3
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB70_5
-; RV32V-ZVFHMIN-NEXT: .LBB70_13: # %cond.store7
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB70_6
-; RV32V-ZVFHMIN-NEXT: .LBB70_14: # %cond.store9
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB70_7
-; RV32V-ZVFHMIN-NEXT: .LBB70_15: # %cond.store11
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB70_8
-; RV32V-ZVFHMIN-NEXT: .LBB70_16: # %cond.store13
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 7
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: ret
-;
-; RV64V-ZVFHMIN-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
-; RV64V-ZVFHMIN: # %bb.0:
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vzext.vf8 v12, v9
-; RV64V-ZVFHMIN-NEXT: vadd.vv v12, v12, v12
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v0
-; RV64V-ZVFHMIN-NEXT: andi a2, a1, 1
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vadd.vx v12, v12, a0
-; RV64V-ZVFHMIN-NEXT: bnez a2, .LBB70_11
-; RV64V-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB70_12
-; RV64V-ZVFHMIN-NEXT: .LBB70_2: # %else2
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB70_13
-; RV64V-ZVFHMIN-NEXT: .LBB70_3: # %else4
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB70_5
-; RV64V-ZVFHMIN-NEXT: .LBB70_4: # %cond.store5
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 3
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: .LBB70_5: # %else6
-; RV64V-ZVFHMIN-NEXT: addi sp, sp, -320
-; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa_offset 320
-; RV64V-ZVFHMIN-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
-; RV64V-ZVFHMIN-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
-; RV64V-ZVFHMIN-NEXT: .cfi_offset ra, -8
-; RV64V-ZVFHMIN-NEXT: .cfi_offset s0, -16
-; RV64V-ZVFHMIN-NEXT: addi s0, sp, 320
-; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa s0, 0
-; RV64V-ZVFHMIN-NEXT: andi sp, sp, -64
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB70_14
-; RV64V-ZVFHMIN-NEXT: # %bb.6: # %else8
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB70_15
-; RV64V-ZVFHMIN-NEXT: .LBB70_7: # %else10
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB70_16
-; RV64V-ZVFHMIN-NEXT: .LBB70_8: # %else12
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB70_10
-; RV64V-ZVFHMIN-NEXT: .LBB70_9: # %cond.store13
-; RV64V-ZVFHMIN-NEXT: mv a0, sp
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 56(sp)
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v8
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: .LBB70_10: # %else14
-; RV64V-ZVFHMIN-NEXT: addi sp, s0, -320
-; RV64V-ZVFHMIN-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
-; RV64V-ZVFHMIN-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
-; RV64V-ZVFHMIN-NEXT: addi sp, sp, 320
-; RV64V-ZVFHMIN-NEXT: ret
-; RV64V-ZVFHMIN-NEXT: .LBB70_11: # %cond.store
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB70_2
-; RV64V-ZVFHMIN-NEXT: .LBB70_12: # %cond.store1
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v12, 1
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB70_3
-; RV64V-ZVFHMIN-NEXT: .LBB70_13: # %cond.store3
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 2
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB70_4
-; RV64V-ZVFHMIN-NEXT: j .LBB70_5
-; RV64V-ZVFHMIN-NEXT: .LBB70_14: # %cond.store7
-; RV64V-ZVFHMIN-NEXT: addi a0, sp, 192
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 224(sp)
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB70_7
-; RV64V-ZVFHMIN-NEXT: .LBB70_15: # %cond.store9
-; RV64V-ZVFHMIN-NEXT: addi a0, sp, 128
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 168(sp)
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB70_8
-; RV64V-ZVFHMIN-NEXT: .LBB70_16: # %cond.store11
-; RV64V-ZVFHMIN-NEXT: addi a0, sp, 64
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 112(sp)
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB70_9
-; RV64V-ZVFHMIN-NEXT: j .LBB70_10
+; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vwaddu.vv v10, v9, v9
+; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
+; RV32-NEXT: ret
;
-; RV32ZVE32F-ZVFH-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
-; RV32ZVE32F-ZVFH: # %bb.0:
-; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32ZVE32F-ZVFH-NEXT: vwaddu.vv v10, v9, v9
-; RV32ZVE32F-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32ZVE32F-ZVFH-NEXT: vsoxei16.v v8, (a0), v10, v0.t
-; RV32ZVE32F-ZVFH-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64V-NEXT: vwaddu.vv v10, v9, v9
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-NEXT: vsoxei16.v v8, (a0), v10, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-ZVFH-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
; RV64ZVE32F-ZVFH: # %bb.0:
@@ -11208,126 +7961,6 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
; RV64ZVE32F-ZVFH-NEXT: ret
;
-; RV32ZVE32F-ZVFHMIN-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
-; RV32ZVE32F-ZVFHMIN: # %bb.0:
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vzext.vf4 v10, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: vadd.vv v10, v10, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_9
-; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB70_10
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_2: # %else2
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB70_11
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_3: # %else4
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB70_12
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_4: # %else6
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB70_13
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_5: # %else8
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB70_14
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_6: # %else10
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB70_15
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_7: # %else12
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB70_16
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_8: # %else14
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_9: # %cond.store
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB70_2
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_10: # %cond.store1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB70_3
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_11: # %cond.store3
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB70_4
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_12: # %cond.store5
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 3
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB70_5
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_13: # %cond.store7
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB70_6
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_14: # %cond.store9
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB70_7
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_15: # %cond.store11
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB70_8
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB70_16: # %cond.store13
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 7
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-;
; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
; RV64ZVE32F-ZVFHMIN: # %bb.0:
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -11468,279 +8101,21 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i
}
define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
-; RV32V-ZVFH-LABEL: mscatter_baseidx_v8f16:
-; RV32V-ZVFH: # %bb.0:
-; RV32V-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32V-ZVFH-NEXT: vwadd.vv v10, v9, v9
-; RV32V-ZVFH-NEXT: vsoxei32.v v8, (a0), v10, v0.t
-; RV32V-ZVFH-NEXT: ret
-;
-; RV64V-ZVFH-LABEL: mscatter_baseidx_v8f16:
-; RV64V-ZVFH: # %bb.0:
-; RV64V-ZVFH-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFH-NEXT: vsext.vf4 v12, v9
-; RV64V-ZVFH-NEXT: vadd.vv v12, v12, v12
-; RV64V-ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-ZVFH-NEXT: vsoxei64.v v8, (a0), v12, v0.t
-; RV64V-ZVFH-NEXT: ret
-;
-; RV32V-ZVFHMIN-LABEL: mscatter_baseidx_v8f16:
-; RV32V-ZVFHMIN: # %bb.0:
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vwadd.vv v10, v9, v9
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a1, v0
-; RV32V-ZVFHMIN-NEXT: andi a2, a1, 1
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
-; RV32V-ZVFHMIN-NEXT: bnez a2, .LBB71_9
-; RV32V-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB71_10
-; RV32V-ZVFHMIN-NEXT: .LBB71_2: # %else2
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB71_11
-; RV32V-ZVFHMIN-NEXT: .LBB71_3: # %else4
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB71_12
-; RV32V-ZVFHMIN-NEXT: .LBB71_4: # %else6
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB71_13
-; RV32V-ZVFHMIN-NEXT: .LBB71_5: # %else8
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB71_14
-; RV32V-ZVFHMIN-NEXT: .LBB71_6: # %else10
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB71_15
-; RV32V-ZVFHMIN-NEXT: .LBB71_7: # %else12
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32V-ZVFHMIN-NEXT: bnez a0, .LBB71_16
-; RV32V-ZVFHMIN-NEXT: .LBB71_8: # %else14
-; RV32V-ZVFHMIN-NEXT: ret
-; RV32V-ZVFHMIN-NEXT: .LBB71_9: # %cond.store
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB71_2
-; RV32V-ZVFHMIN-NEXT: .LBB71_10: # %cond.store1
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB71_3
-; RV32V-ZVFHMIN-NEXT: .LBB71_11: # %cond.store3
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB71_4
-; RV32V-ZVFHMIN-NEXT: .LBB71_12: # %cond.store5
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 3
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB71_5
-; RV32V-ZVFHMIN-NEXT: .LBB71_13: # %cond.store7
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB71_6
-; RV32V-ZVFHMIN-NEXT: .LBB71_14: # %cond.store9
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB71_7
-; RV32V-ZVFHMIN-NEXT: .LBB71_15: # %cond.store11
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32V-ZVFHMIN-NEXT: beqz a0, .LBB71_8
-; RV32V-ZVFHMIN-NEXT: .LBB71_16: # %cond.store13
-; RV32V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32V-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32V-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 7
-; RV32V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32V-ZVFHMIN-NEXT: ret
-;
-; RV64V-ZVFHMIN-LABEL: mscatter_baseidx_v8f16:
-; RV64V-ZVFHMIN: # %bb.0:
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vsext.vf4 v12, v9
-; RV64V-ZVFHMIN-NEXT: vadd.vv v12, v12, v12
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v0
-; RV64V-ZVFHMIN-NEXT: andi a2, a1, 1
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vadd.vx v12, v12, a0
-; RV64V-ZVFHMIN-NEXT: bnez a2, .LBB71_11
-; RV64V-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB71_12
-; RV64V-ZVFHMIN-NEXT: .LBB71_2: # %else2
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB71_13
-; RV64V-ZVFHMIN-NEXT: .LBB71_3: # %else4
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB71_5
-; RV64V-ZVFHMIN-NEXT: .LBB71_4: # %cond.store5
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 3
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: .LBB71_5: # %else6
-; RV64V-ZVFHMIN-NEXT: addi sp, sp, -320
-; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa_offset 320
-; RV64V-ZVFHMIN-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
-; RV64V-ZVFHMIN-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
-; RV64V-ZVFHMIN-NEXT: .cfi_offset ra, -8
-; RV64V-ZVFHMIN-NEXT: .cfi_offset s0, -16
-; RV64V-ZVFHMIN-NEXT: addi s0, sp, 320
-; RV64V-ZVFHMIN-NEXT: .cfi_def_cfa s0, 0
-; RV64V-ZVFHMIN-NEXT: andi sp, sp, -64
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB71_14
-; RV64V-ZVFHMIN-NEXT: # %bb.6: # %else8
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB71_15
-; RV64V-ZVFHMIN-NEXT: .LBB71_7: # %else10
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB71_16
-; RV64V-ZVFHMIN-NEXT: .LBB71_8: # %else12
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB71_10
-; RV64V-ZVFHMIN-NEXT: .LBB71_9: # %cond.store13
-; RV64V-ZVFHMIN-NEXT: mv a0, sp
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 56(sp)
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a1, v8
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a1
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: .LBB71_10: # %else14
-; RV64V-ZVFHMIN-NEXT: addi sp, s0, -320
-; RV64V-ZVFHMIN-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
-; RV64V-ZVFHMIN-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
-; RV64V-ZVFHMIN-NEXT: addi sp, sp, 320
-; RV64V-ZVFHMIN-NEXT: ret
-; RV64V-ZVFHMIN-NEXT: .LBB71_11: # %cond.store
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB71_2
-; RV64V-ZVFHMIN-NEXT: .LBB71_12: # %cond.store1
-; RV64V-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v12, 1
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB71_3
-; RV64V-ZVFHMIN-NEXT: .LBB71_13: # %cond.store3
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v10, v12, 2
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB71_4
-; RV64V-ZVFHMIN-NEXT: j .LBB71_5
-; RV64V-ZVFHMIN-NEXT: .LBB71_14: # %cond.store7
-; RV64V-ZVFHMIN-NEXT: addi a0, sp, 192
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 224(sp)
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB71_7
-; RV64V-ZVFHMIN-NEXT: .LBB71_15: # %cond.store9
-; RV64V-ZVFHMIN-NEXT: addi a0, sp, 128
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 168(sp)
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV64V-ZVFHMIN-NEXT: beqz a0, .LBB71_8
-; RV64V-ZVFHMIN-NEXT: .LBB71_16: # %cond.store11
-; RV64V-ZVFHMIN-NEXT: addi a0, sp, 64
-; RV64V-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-ZVFHMIN-NEXT: vse64.v v12, (a0)
-; RV64V-ZVFHMIN-NEXT: ld a0, 112(sp)
-; RV64V-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
-; RV64V-ZVFHMIN-NEXT: vmv.x.s a2, v9
-; RV64V-ZVFHMIN-NEXT: fmv.h.x fa5, a2
-; RV64V-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV64V-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV64V-ZVFHMIN-NEXT: bnez a0, .LBB71_9
-; RV64V-ZVFHMIN-NEXT: j .LBB71_10
+; RV32-LABEL: mscatter_baseidx_v8f16:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32-NEXT: vwadd.vv v10, v9, v9
+; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
+; RV32-NEXT: ret
;
-; RV32ZVE32F-ZVFH-LABEL: mscatter_baseidx_v8f16:
-; RV32ZVE32F-ZVFH: # %bb.0:
-; RV32ZVE32F-ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32ZVE32F-ZVFH-NEXT: vwadd.vv v10, v9, v9
-; RV32ZVE32F-ZVFH-NEXT: vsoxei32.v v8, (a0), v10, v0.t
-; RV32ZVE32F-ZVFH-NEXT: ret
+; RV64V-LABEL: mscatter_baseidx_v8f16:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV64V-NEXT: vsext.vf4 v12, v9
+; RV64V-NEXT: vadd.vv v12, v12, v12
+; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
+; RV64V-NEXT: ret
;
; RV64ZVE32F-ZVFH-LABEL: mscatter_baseidx_v8f16:
; RV64ZVE32F-ZVFH: # %bb.0:
@@ -11851,125 +8226,6 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs,
; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
; RV64ZVE32F-ZVFH-NEXT: ret
;
-; RV32ZVE32F-ZVFHMIN-LABEL: mscatter_baseidx_v8f16:
-; RV32ZVE32F-ZVFHMIN: # %bb.0:
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vwadd.vv v10, v9, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vadd.vx v10, v10, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_9
-; RV32ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB71_10
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_2: # %else2
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB71_11
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_3: # %else4
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB71_12
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_4: # %else6
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB71_13
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_5: # %else8
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB71_14
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_6: # %else10
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB71_15
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_7: # %else12
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB71_16
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_8: # %else14
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_9: # %cond.store
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v10
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB71_2
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_10: # %cond.store1
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB71_3
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_11: # %cond.store3
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 8
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB71_4
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_12: # %cond.store5
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 3
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 16
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB71_5
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_13: # %cond.store7
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 4
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 32
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB71_6
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_14: # %cond.store9
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 5
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, 64
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB71_7
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_15: # %cond.store11
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v12, v10, 6
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v12
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: andi a0, a1, -128
-; RV32ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB71_8
-; RV32ZVE32F-ZVFHMIN-NEXT: .LBB71_16: # %cond.store13
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
-; RV32ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 7
-; RV32ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
-; RV32ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
-; RV32ZVE32F-ZVFHMIN-NEXT: ret
-;
; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_baseidx_v8f16:
; RV64ZVE32F-ZVFHMIN: # %bb.0:
; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
@@ -16582,4 +12838,10 @@ define void @mscatter_shuffle_rotate(<8 x i16> %val, ptr %base) {
ret void
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32V-ZVFH: {{.*}}
+; RV32V-ZVFHMIN: {{.*}}
+; RV32ZVE32F-ZVFH: {{.*}}
+; RV32ZVE32F-ZVFHMIN: {{.*}}
; RV64: {{.*}}
+; RV64V-ZVFH: {{.*}}
+; RV64V-ZVFHMIN: {{.*}}
More information about the llvm-commits
mailing list